ygg 0.1.47__py3-none-any.whl → 0.1.49__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ygg
3
- Version: 0.1.47
3
+ Version: 0.1.49
4
4
  Summary: Type-friendly utilities for moving data between Python objects, Arrow, Polars, Pandas, Spark, and Databricks
5
5
  Author: Yggdrasil contributors
6
6
  License: Apache License
@@ -235,6 +235,8 @@ Requires-Dist: pytest-asyncio; extra == "dev"
235
235
  Requires-Dist: black; extra == "dev"
236
236
  Requires-Dist: ruff; extra == "dev"
237
237
  Requires-Dist: mypy; extra == "dev"
238
+ Requires-Dist: build; extra == "dev"
239
+ Requires-Dist: twine; extra == "dev"
238
240
  Dynamic: license-file
239
241
 
240
242
  # Yggdrasil (Python)
@@ -270,34 +272,6 @@ Extras are grouped by engine:
270
272
  - `.[polars]`, `.[pandas]`, `.[spark]`, `.[databricks]` – install only the integrations you need.
271
273
  - `.[dev]` – adds testing, linting, and typing tools (`pytest`, `ruff`, `black`, `mypy`).
272
274
 
273
- ## Quickstart
274
- Define an Arrow-aware dataclass, coerce inputs, and cast across containers:
275
-
276
- ```python
277
- from yggdrasil import yggdataclass
278
- from yggdrasil.types.cast import convert
279
- from yggdrasil.types import arrow_field_from_hint
280
-
281
- @yggdataclass
282
- class User:
283
- id: int
284
- email: str
285
- active: bool = True
286
-
287
- user = User.__safe_init__("123", email="alice@example.com")
288
- assert user.id == 123 and user.active is True
289
-
290
- payload = {"id": "45", "email": "bob@example.com", "active": "false"}
291
- clean = User.from_dict(payload)
292
- print(clean.to_dict())
293
-
294
- field = arrow_field_from_hint(User, name="user")
295
- print(field) # user: struct<id: int64, email: string, active: bool>
296
-
297
- numbers = convert(["1", "2", "3"], list[int])
298
- print(numbers)
299
- ```
300
-
301
275
  ### Databricks example
302
276
  Install the `databricks` extra and run SQL with typed results:
303
277
 
@@ -1,28 +1,28 @@
1
- ygg-0.1.47.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
2
- yggdrasil/__init__.py,sha256=PfH7Xwt6uue6oqe6S5V8NhDJcVQClkKrBE1KXhdelZc,117
3
- yggdrasil/version.py,sha256=TPABAGTrArQ_wApv_9aI4NXV4UtQ6uCrBrlNiCowSWI,22
1
+ ygg-0.1.49.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
2
+ yggdrasil/__init__.py,sha256=4-ghPak2S6zfMqmnlxW2GCgPb5s79znpKa2hGEGXcE4,24
3
+ yggdrasil/version.py,sha256=pnii9XXudF0U50FobVvNgNzGy9lA9q_DntGQAvyqaFA,22
4
4
  yggdrasil/databricks/__init__.py,sha256=skctY2c8W-hI81upx9F_PWRe5ishL3hrdiTuizgDjdw,152
5
5
  yggdrasil/databricks/compute/__init__.py,sha256=NvdzmaJSNYY1uJthv1hHdBuNu3bD_-Z65DWnaJt9yXg,289
6
- yggdrasil/databricks/compute/cluster.py,sha256=HI9811oBCpWeo4V921FVAlRUXKXM4XO7HS9DQVOuzpM,41340
7
- yggdrasil/databricks/compute/execution_context.py,sha256=anOxfNms83dZ5FTknbfT8uj889LjheMqEx9W5NtJC9E,23094
8
- yggdrasil/databricks/compute/remote.py,sha256=nEN_Fr1Ouul_iKOf4B5QjEGscYAcl7nHjGsl2toRzrU,2874
6
+ yggdrasil/databricks/compute/cluster.py,sha256=YomLfvB0oxbgl6WDgBRxI1UXsxwlEbR6gq3FUbPHscY,44199
7
+ yggdrasil/databricks/compute/execution_context.py,sha256=jIV6uru2NeX3O5lg-3KEqmXtLxxq45CFgkBQgQIIOHQ,23327
8
+ yggdrasil/databricks/compute/remote.py,sha256=yicEhyQypssRa2ByscO36s3cBkEgORFsRME9aaq91Pc,3045
9
9
  yggdrasil/databricks/jobs/__init__.py,sha256=snxGSJb0M5I39v0y3IR-uEeSlZR248cQ_4DJ1sYs-h8,154
10
10
  yggdrasil/databricks/jobs/config.py,sha256=9LGeHD04hbfy0xt8_6oobC4moKJh4_DTjZiK4Q2Tqjk,11557
11
- yggdrasil/databricks/sql/__init__.py,sha256=y1n5yg-drZ8QVZbEgznsRG24kdJSnFis9l2YfYCsaCM,234
11
+ yggdrasil/databricks/sql/__init__.py,sha256=Vp_1cFaX1l-JGzCknvkbiB8CBFX2fQbBNntIeVn3lEg,231
12
12
  yggdrasil/databricks/sql/engine.py,sha256=K5WmGKpXU78JA3UdK8dLxBD_GXKidZJFe7hytuC5UHg,41029
13
- yggdrasil/databricks/sql/exceptions.py,sha256=Jqd_gT_VyPL8klJEHYEzpv5eHtmdY43WiQ7HZBaEqSk,53
14
- yggdrasil/databricks/sql/statement_result.py,sha256=_mBolHae0AASfe1Tlld1KTXs-K4-oy9dniHDyR2ILYc,16736
13
+ yggdrasil/databricks/sql/exceptions.py,sha256=uC-BoG0u0LtORKUS1X3iLID8nc-0TV5MQN3M8RXHsO4,1495
14
+ yggdrasil/databricks/sql/statement_result.py,sha256=kMBvpwyRv3_JUZSvxMS0c9Vqlh6LtCRJvXsDpu9RIAs,16137
15
15
  yggdrasil/databricks/sql/types.py,sha256=5G-BM9_eOsRKEMzeDTWUsWW5g4Idvs-czVCpOCrMhdA,6412
16
16
  yggdrasil/databricks/sql/warehouse.py,sha256=1J0dyQLJb-OS1_1xU1eAVZ4CoL2-FhFeowKSvU3RzFc,9773
17
- yggdrasil/databricks/workspaces/__init__.py,sha256=Ti1I99JTC3koYJaCy8WYvkAox4KdcuMRk8b2rHroWCY,133
17
+ yggdrasil/databricks/workspaces/__init__.py,sha256=dv2zotoFVhNFlTCdRq6gwf5bEzeZkOZszoNZMs0k59g,114
18
18
  yggdrasil/databricks/workspaces/filesytem.py,sha256=Z8JXU7_XUEbw9fpTQT1avRQKi-IAP2KemXBMPkUoY4w,9805
19
- yggdrasil/databricks/workspaces/io.py,sha256=Tdde4LaGNJNT50R11OkEYZyNacyIW9QrOXMAicAlIr4,32208
20
- yggdrasil/databricks/workspaces/path.py,sha256=-XnCD9p42who3DAwnITVE1KyrZUSoXDKHA8iZi-7wk4,47743
19
+ yggdrasil/databricks/workspaces/io.py,sha256=CDq9NsYFjlSJ1QbKFlfWvZLQPVoWyZ4b3XR_lxNPcZE,32776
20
+ yggdrasil/databricks/workspaces/path.py,sha256=BxDwxE7q1-NLKEZQT4xLM3LeCeQKO3wUy7R-Ce-cSMk,47875
21
21
  yggdrasil/databricks/workspaces/path_kind.py,sha256=Xc319NysH8_6E9C0Q8nCxDHYG07_SnzyUVKHe0dNdDQ,305
22
- yggdrasil/databricks/workspaces/workspace.py,sha256=c6CBBun2BskEnsP74pbLVOe_TKXZs4L4r4gPQtIzlQE,23821
23
- yggdrasil/dataclasses/__init__.py,sha256=6SdfIyTsoM4AuVw5TW4Q-UWXz41EyfsMcpD30cmjbSM,125
24
- yggdrasil/dataclasses/dataclass.py,sha256=fKokFUnqe4CmXXGMTdF4XDWbCUl_c_-se-UD48L5s1E,6594
25
- yggdrasil/libs/__init__.py,sha256=ulzk-ZkFUI2Pfo93YKtO8MBsEWtRZzLos7HAxN74R0w,168
22
+ yggdrasil/databricks/workspaces/workspace.py,sha256=zBlQdYNT_xKwUCYo3O4Q4g-8pfMvff3I26efyCfY_TY,24961
23
+ yggdrasil/dataclasses/__init__.py,sha256=_RkhfF3KC1eSORby1dzvBXQ0-UGG3u6wyUQWX2jq1Pc,108
24
+ yggdrasil/dataclasses/dataclass.py,sha256=LxrCjwvmBnb8yRI_N-c31RHHxB4XoJPixmKg9iBIuaI,1148
25
+ yggdrasil/libs/__init__.py,sha256=zdC9OU0Xy36CLY9mg2drxN6S7isPR8aTLzJA6xVIeLE,91
26
26
  yggdrasil/libs/databrickslib.py,sha256=NHJeUViHhZc8LI5oDVfi1axRyUy_pDJLy4hjD0KZEBQ,980
27
27
  yggdrasil/libs/pandaslib.py,sha256=Edm3SXgvr8qe2wsojuRvD1ewNB-Sff0RWoTqaddVruI,509
28
28
  yggdrasil/libs/polarslib.py,sha256=7EWP5iS8F9cW79M6d8Yg5ysjnOY3w4_k7TW-5DCRACw,511
@@ -37,18 +37,17 @@ yggdrasil/pyutils/exceptions.py,sha256=ssKNm-rjhavHUOZmGA7_1Gq9tSHDrb2EFI-cnBuWg
37
37
  yggdrasil/pyutils/expiring_dict.py,sha256=pr2u25LGwPVbLfsLptiHGovUtYRRo0AMjaJtCtJl7nQ,8477
38
38
  yggdrasil/pyutils/modules.py,sha256=B7IP99YqUMW6-DIESFzBx8-09V1d0a8qrIJUDFhhL2g,11424
39
39
  yggdrasil/pyutils/parallel.py,sha256=ubuq2m9dJzWYUyKCga4Y_9bpaeMYUrleYxdp49CHr44,6781
40
- yggdrasil/pyutils/python_env.py,sha256=tuglnjdqHQjNh18qDladVoSEOjCD0RcnMEPYJ0tArOs,50985
40
+ yggdrasil/pyutils/python_env.py,sha256=Gh5geFK9ABpyWEfyegGUfIJUoPxKwcH0pqLBiMrW9Rw,51103
41
41
  yggdrasil/pyutils/retry.py,sha256=n5sr-Zu7fYrdLbjJ4WifK2lk0gEGmHv5FYt2HaCm1Qc,11916
42
- yggdrasil/requests/__init__.py,sha256=wNkP5INH-SshEBZ1MTlqP2yAMLxo5BQK5B5oHEw3FkI,140
42
+ yggdrasil/requests/__init__.py,sha256=dMesyzq97_DmI765x0TwaDPEfsxFtgGNgchk8LvEN-o,103
43
43
  yggdrasil/requests/msal.py,sha256=s2GCyzbgFdgdlJ1JqMrZ4qYVbmoG46-ZOTcaVQhZ-sQ,9220
44
44
  yggdrasil/requests/session.py,sha256=SLnrgHY0Lby7ZxclRFUjHdfM8euN_8bSQEWl7TkJY2U,1461
45
45
  yggdrasil/types/__init__.py,sha256=CrLiDeYNM9fO975sE5ufeVKcy7Ca702IsaG2Pk8T3YU,139
46
- yggdrasil/types/libs.py,sha256=2iRT9JDUdr9seuGz9ZR3wWdrxZ8LRnc9i-m_tkKdKgI,293
47
46
  yggdrasil/types/python_arrow.py,sha256=mOhyecAxa5u8JWsyTO26OMOWimHHgwLKWlkNSAyIVas,25636
48
47
  yggdrasil/types/python_defaults.py,sha256=GO3hZBZcwRHs9qiXes75y8l5X00kZHTfEC7el_x73uw,10184
49
48
  yggdrasil/types/cast/__init__.py,sha256=Oft3pTs2bRM5hT7YqJAuOKTYYk-SACLaMOXUVdafy_I,311
50
49
  yggdrasil/types/cast/arrow_cast.py,sha256=_OMYc4t5GlgE4ztlWaCoK8Jnba09rgDbmHVP-QXhOL0,41523
51
- yggdrasil/types/cast/cast_options.py,sha256=iVfZIp6XR85yxftQXp_oGVdrZ3k6Mijt0doJEBRmNTY,15527
50
+ yggdrasil/types/cast/cast_options.py,sha256=nDaEvCCs7TBamhTWyDrYf3LVaBWzioIP2Q5_LXrChF4,15532
52
51
  yggdrasil/types/cast/pandas_cast.py,sha256=I3xu0sZ59ZbK3NDcQ2dslzdeKzhpFV5zR02ZEixd5hI,8713
53
52
  yggdrasil/types/cast/polars_cast.py,sha256=K2nnQ7bexArneYEhUPgV_6er4JNq6N5RmbMUhw-2_Xw,28766
54
53
  yggdrasil/types/cast/polars_pandas_cast.py,sha256=CS0P7teVv15IdX5g7v40RfkH1VMg6b-HM0V_gOfacm8,5071
@@ -56,8 +55,8 @@ yggdrasil/types/cast/registry.py,sha256=_zdFGmUBB7P-e_LIcJlOxMcxAkXoA-UXB6HqLMgT
56
55
  yggdrasil/types/cast/spark_cast.py,sha256=_KAsl1DqmKMSfWxqhVE7gosjYdgiL1C5bDQv6eP3HtA,24926
57
56
  yggdrasil/types/cast/spark_pandas_cast.py,sha256=BuTiWrdCANZCdD_p2MAytqm74eq-rdRXd-LGojBRrfU,5023
58
57
  yggdrasil/types/cast/spark_polars_cast.py,sha256=btmZNHXn2NSt3fUuB4xg7coaE0RezIBdZD92H8NK0Jw,9073
59
- ygg-0.1.47.dist-info/METADATA,sha256=gX37uRIIu4qh1VRItyOxOZK1_WNVvfxX6gBbb5DCN0Q,19204
60
- ygg-0.1.47.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
61
- ygg-0.1.47.dist-info/entry_points.txt,sha256=6q-vpWG3kvw2dhctQ0LALdatoeefkN855Ev02I1dKGY,70
62
- ygg-0.1.47.dist-info/top_level.txt,sha256=iBe9Kk4VIVbLpgv_p8OZUIfxgj4dgJ5wBg6vO3rigso,10
63
- ygg-0.1.47.dist-info/RECORD,,
58
+ ygg-0.1.49.dist-info/METADATA,sha256=CHTqeVyiYa1868ZDwISDHKyXYxPeUH0mHhvHLYYoDbg,18528
59
+ ygg-0.1.49.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
60
+ ygg-0.1.49.dist-info/entry_points.txt,sha256=6q-vpWG3kvw2dhctQ0LALdatoeefkN855Ev02I1dKGY,70
61
+ ygg-0.1.49.dist-info/top_level.txt,sha256=iBe9Kk4VIVbLpgv_p8OZUIfxgj4dgJ5wBg6vO3rigso,10
62
+ ygg-0.1.49.dist-info/RECORD,,
yggdrasil/__init__.py CHANGED
@@ -1,5 +1 @@
1
1
  from .version import *
2
- from .dataclasses import yggdataclass
3
- from .types import convert
4
- from .pyutils import *
5
-
@@ -22,8 +22,8 @@ from typing import Any, Iterator, Optional, Union, List, Callable, Dict, ClassVa
22
22
 
23
23
  from .execution_context import ExecutionContext
24
24
  from ..workspaces.workspace import WorkspaceService, Workspace
25
- from ... import CallableSerde
26
25
  from ...libs.databrickslib import databricks_sdk
26
+ from ...pyutils.callable_serde import CallableSerde
27
27
  from ...pyutils.equality import dicts_equal, dict_diff
28
28
  from ...pyutils.expiring_dict import ExpiringDict
29
29
  from ...pyutils.modules import PipIndexSettings
@@ -36,7 +36,8 @@ else: # pragma: no cover - runtime fallback when SDK is missing
36
36
  from databricks.sdk.errors import DatabricksError
37
37
  from databricks.sdk.errors.platform import ResourceDoesNotExist
38
38
  from databricks.sdk.service.compute import (
39
- ClusterDetails, Language, Kind, State, DataSecurityMode, Library, PythonPyPiLibrary, LibraryInstallStatus
39
+ ClusterDetails, Language, Kind, State, DataSecurityMode, Library, PythonPyPiLibrary, LibraryInstallStatus,
40
+ ClusterAccessControlRequest, ClusterPermissionLevel
40
41
  )
41
42
  from databricks.sdk.service.compute import SparkVersion, RuntimeEngine
42
43
 
@@ -143,6 +144,7 @@ class Cluster(WorkspaceService):
143
144
  single_user_name: Optional[str] = None,
144
145
  runtime_engine: Optional["RuntimeEngine"] = None,
145
146
  libraries: Optional[list[str]] = None,
147
+ update_timeout: Optional[Union[float, dt.timedelta]] = dt.timedelta(minutes=20),
146
148
  **kwargs
147
149
  ) -> "Cluster":
148
150
  """Create or reuse a cluster that mirrors the current Python environment.
@@ -151,9 +153,10 @@ class Cluster(WorkspaceService):
151
153
  workspace: Workspace to use for the cluster.
152
154
  cluster_id: Optional cluster id to reuse.
153
155
  cluster_name: Optional cluster name to reuse.
154
- single_user_name: Optional user name for single-user clusters.
156
+ single_user_name: Optional username for single-user clusters.
155
157
  runtime_engine: Optional Databricks runtime engine.
156
158
  libraries: Optional list of libraries to install.
159
+ update_timeout: wait timeout, if None it will not wait completion
157
160
  **kwargs: Additional cluster specification overrides.
158
161
 
159
162
  Returns:
@@ -175,6 +178,7 @@ class Cluster(WorkspaceService):
175
178
  single_user_name=single_user_name,
176
179
  runtime_engine=runtime_engine,
177
180
  libraries=libraries,
181
+ update_timeout=update_timeout,
178
182
  **kwargs
179
183
  )
180
184
  )
@@ -189,6 +193,7 @@ class Cluster(WorkspaceService):
189
193
  single_user_name: Optional[str] = "current",
190
194
  runtime_engine: Optional["RuntimeEngine"] = None,
191
195
  libraries: Optional[list[str]] = None,
196
+ update_timeout: Optional[Union[float, dt.timedelta]] = dt.timedelta(minutes=20),
192
197
  **kwargs
193
198
  ) -> "Cluster":
194
199
  """Create/update a cluster to match the local Python environment.
@@ -197,9 +202,10 @@ class Cluster(WorkspaceService):
197
202
  source: Optional PythonEnv to mirror (defaults to current).
198
203
  cluster_id: Optional cluster id to update.
199
204
  cluster_name: Optional cluster name to update.
200
- single_user_name: Optional single user name for the cluster.
205
+ single_user_name: Optional single username for the cluster.
201
206
  runtime_engine: Optional runtime engine selection.
202
207
  libraries: Optional list of libraries to install.
208
+ update_timeout: wait timeout, if None it will not wait completion
203
209
  **kwargs: Additional cluster specification overrides.
204
210
 
205
211
  Returns:
@@ -241,6 +247,7 @@ class Cluster(WorkspaceService):
241
247
  single_user_name=single_user_name,
242
248
  runtime_engine=runtime_engine or RuntimeEngine.PHOTON,
243
249
  libraries=libraries,
250
+ update_timeout=update_timeout,
244
251
  **kwargs
245
252
  )
246
253
 
@@ -379,7 +386,9 @@ class Cluster(WorkspaceService):
379
386
  start = time.time()
380
387
  sleep_time = tick
381
388
 
382
- if isinstance(timeout, dt.timedelta):
389
+ if not timeout:
390
+ timeout = 20 * 60.0
391
+ elif isinstance(timeout, dt.timedelta):
383
392
  timeout = timeout.total_seconds()
384
393
 
385
394
  while self.is_pending:
@@ -411,12 +420,14 @@ class Cluster(WorkspaceService):
411
420
  # Extract "major.minor" from strings like "17.3.x-scala2.13-ml-gpu"
412
421
  v = self.spark_version
413
422
 
414
- if v is None:
423
+ if not v:
415
424
  return None
416
425
 
417
426
  parts = v.split(".")
427
+
418
428
  if len(parts) < 2:
419
429
  return None
430
+
420
431
  return ".".join(parts[:2]) # e.g. "17.3"
421
432
 
422
433
  @property
@@ -427,8 +438,10 @@ class Cluster(WorkspaceService):
427
438
  When the runtime can't be mapped, returns ``None``.
428
439
  """
429
440
  v = self.runtime_version
430
- if v is None:
441
+
442
+ if not v:
431
443
  return None
444
+
432
445
  return _PYTHON_BY_DBR.get(v)
433
446
 
434
447
  # ------------------------------------------------------------------ #
@@ -585,6 +598,7 @@ class Cluster(WorkspaceService):
585
598
  cluster_id: Optional[str] = None,
586
599
  cluster_name: Optional[str] = None,
587
600
  libraries: Optional[List[Union[str, "Library"]]] = None,
601
+ update_timeout: Optional[Union[float, dt.timedelta]] = dt.timedelta(minutes=20),
588
602
  **cluster_spec: Any
589
603
  ):
590
604
  """Create a new cluster or update an existing one.
@@ -593,6 +607,7 @@ class Cluster(WorkspaceService):
593
607
  cluster_id: Optional cluster id to update.
594
608
  cluster_name: Optional cluster name to update or create.
595
609
  libraries: Optional libraries to install.
610
+ update_timeout: wait timeout, if None it will not wait completion
596
611
  **cluster_spec: Cluster specification overrides.
597
612
 
598
613
  Returns:
@@ -608,24 +623,28 @@ class Cluster(WorkspaceService):
608
623
  return found.update(
609
624
  cluster_name=cluster_name,
610
625
  libraries=libraries,
626
+ wait_timeout=update_timeout,
611
627
  **cluster_spec
612
628
  )
613
629
 
614
630
  return self.create(
615
631
  cluster_name=cluster_name,
616
632
  libraries=libraries,
633
+ wait_timeout=update_timeout,
617
634
  **cluster_spec
618
635
  )
619
636
 
620
637
  def create(
621
638
  self,
622
639
  libraries: Optional[List[Union[str, "Library"]]] = None,
640
+ wait_timeout: Union[float, dt.timedelta] = dt.timedelta(minutes=20),
623
641
  **cluster_spec: Any
624
642
  ) -> str:
625
643
  """Create a new cluster and optionally install libraries.
626
644
 
627
645
  Args:
628
646
  libraries: Optional list of libraries to install after creation.
647
+ wait_timeout: wait timeout, if None it will not wait completion
629
648
  **cluster_spec: Cluster specification overrides.
630
649
 
631
650
  Returns:
@@ -645,27 +664,32 @@ class Cluster(WorkspaceService):
645
664
  update_details,
646
665
  )
647
666
 
648
- self.details = self.clusters_client().create_and_wait(**update_details)
667
+ self.details = self.clusters_client().create(**update_details)
649
668
 
650
669
  LOGGER.info(
651
670
  "Created %s",
652
671
  self
653
672
  )
654
673
 
655
- self.install_libraries(libraries=libraries, raise_error=False)
674
+ self.install_libraries(libraries=libraries, raise_error=False, wait_timeout=None)
675
+
676
+ if wait_timeout:
677
+ self.wait_for_status(timeout=wait_timeout)
656
678
 
657
679
  return self
658
680
 
659
681
  def update(
660
682
  self,
661
683
  libraries: Optional[List[Union[str, "Library"]]] = None,
662
- wait_timeout: Union[float, dt.timedelta] = dt.timedelta(minutes=20),
684
+ access_control_list: Optional[List["ClusterAccessControlRequest"]] = None,
685
+ wait_timeout: Optional[Union[float, dt.timedelta]] = dt.timedelta(minutes=20),
663
686
  **cluster_spec: Any
664
687
  ) -> "Cluster":
665
688
  """Update cluster configuration and optionally install libraries.
666
689
 
667
690
  Args:
668
691
  libraries: Optional libraries to install.
692
+ access_control_list: List of permissions
669
693
  wait_timeout: waiting timeout until done, if None it does not wait
670
694
  **cluster_spec: Cluster specification overrides.
671
695
 
@@ -705,8 +729,9 @@ class Cluster(WorkspaceService):
705
729
  self, diff
706
730
  )
707
731
 
708
- self.wait_for_status()
732
+ self.wait_for_status(timeout=wait_timeout)
709
733
  self.clusters_client().edit(**update_details)
734
+ self.update_permissions(access_control_list=access_control_list)
710
735
 
711
736
  LOGGER.info(
712
737
  "Updated %s",
@@ -718,6 +743,56 @@ class Cluster(WorkspaceService):
718
743
 
719
744
  return self
720
745
 
746
+ def update_permissions(
747
+ self,
748
+ access_control_list: Optional[List["ClusterAccessControlRequest"]] = None,
749
+ ):
750
+ if not access_control_list:
751
+ return self
752
+
753
+ access_control_list = self._check_permission(access_control_list)
754
+
755
+ self.clusters_client().update_permissions(
756
+ cluster_id=self.cluster_id,
757
+ access_control_list=access_control_list
758
+ )
759
+
760
+ def default_permissions(self):
761
+ current_groups = self.current_user.groups or []
762
+
763
+ return [
764
+ ClusterAccessControlRequest(
765
+ group_name=name,
766
+ permission_level=ClusterPermissionLevel.CAN_MANAGE
767
+ )
768
+ for name in current_groups
769
+ if name not in {"users"}
770
+ ]
771
+
772
+ def _check_permission(
773
+ self,
774
+ permission: Union[str, "ClusterAccessControlRequest", List[Union[str, "ClusterAccessControlRequest"]]],
775
+ ):
776
+ if isinstance(permission, ClusterAccessControlRequest):
777
+ return permission
778
+
779
+ if isinstance(permission, str):
780
+ if "@" in permission:
781
+ group_name, user_name = None, permission
782
+ else:
783
+ group_name, user_name = permission, None
784
+
785
+ return ClusterAccessControlRequest(
786
+ group_name=group_name,
787
+ user_name=user_name,
788
+ permission_level=ClusterPermissionLevel.CAN_MANAGE
789
+ )
790
+
791
+ return [
792
+ self._check_permission(_)
793
+ for _ in permission
794
+ ]
795
+
721
796
  def list_clusters(self) -> Iterator["Cluster"]:
722
797
  """Iterate clusters, yielding helpers annotated with metadata.
723
798
 
@@ -809,18 +884,22 @@ class Cluster(WorkspaceService):
809
884
  Returns:
810
885
  The current Cluster instance.
811
886
  """
887
+ if self.is_running:
888
+ return self
889
+
812
890
  self.wait_for_status()
813
891
 
814
- if not self.is_running:
815
- LOGGER.debug("Starting %s", self)
892
+ if self.is_running:
893
+ return self
816
894
 
817
- if wait_timeout:
818
- self.clusters_client().start(cluster_id=self.cluster_id)
819
- self.wait_for_status(timeout=wait_timeout.total_seconds())
820
- else:
821
- self.clusters_client().start(cluster_id=self.cluster_id)
895
+ LOGGER.debug("Starting %s", self)
896
+
897
+ self.clusters_client().start(cluster_id=self.cluster_id)
822
898
 
823
- LOGGER.info("Started %s", self)
899
+ LOGGER.info("Started %s", self)
900
+
901
+ if wait_timeout:
902
+ self.wait_for_status(timeout=wait_timeout.total_seconds())
824
903
 
825
904
  return self
826
905
 
@@ -836,7 +915,7 @@ class Cluster(WorkspaceService):
836
915
 
837
916
  if self.is_running:
838
917
  self.details = self.clusters_client().restart_and_wait(cluster_id=self.cluster_id)
839
- return self.wait_for_status()
918
+ return self
840
919
 
841
920
  return self.start()
842
921
 
@@ -180,7 +180,7 @@ print(json.dumps(meta))"""
180
180
  """
181
181
  return self.cluster.workspace.sdk()
182
182
 
183
- def _create_command(
183
+ def create_command(
184
184
  self,
185
185
  language: "Language",
186
186
  ) -> any:
@@ -192,17 +192,29 @@ print(json.dumps(meta))"""
192
192
  Returns:
193
193
  The created command execution context response.
194
194
  """
195
- self.cluster.ensure_running()
196
-
197
195
  LOGGER.debug(
198
196
  "Creating Databricks command execution context for %s",
199
197
  self.cluster
200
198
  )
201
199
 
202
- created = self._workspace_client().command_execution.create_and_wait(
203
- cluster_id=self.cluster.cluster_id,
204
- language=language,
200
+ try:
201
+ created = self._workspace_client().command_execution.create_and_wait(
202
+ cluster_id=self.cluster.cluster_id,
203
+ language=language,
204
+ )
205
+ except:
206
+ self.cluster.ensure_running()
207
+
208
+ created = self._workspace_client().command_execution.create_and_wait(
209
+ cluster_id=self.cluster.cluster_id,
210
+ language=language,
211
+ )
212
+
213
+ LOGGER.info(
214
+ "Created Databricks command execution context %s",
215
+ self
205
216
  )
217
+
206
218
  created = getattr(created, "response", created)
207
219
 
208
220
  return created
@@ -220,10 +232,6 @@ print(json.dumps(meta))"""
220
232
  The connected ExecutionContext instance.
221
233
  """
222
234
  if self.context_id is not None:
223
- LOGGER.debug(
224
- "Execution context already open for %s",
225
- self
226
- )
227
235
  return self
228
236
 
229
237
  self.language = language or self.language
@@ -231,7 +239,7 @@ print(json.dumps(meta))"""
231
239
  if self.language is None:
232
240
  self.language = Language.PYTHON
233
241
 
234
- ctx = self._create_command(language=self.language)
242
+ ctx = self.create_command(language=self.language)
235
243
 
236
244
  context_id = ctx.id
237
245
  if not context_id:
@@ -39,6 +39,7 @@ def databricks_remote_compute(
39
39
  timeout: Optional[dt.timedelta] = None,
40
40
  env_keys: Optional[List[str]] = None,
41
41
  force_local: bool = False,
42
+ update_timeout: Optional[Union[float, dt.timedelta]] = None,
42
43
  **options
43
44
  ) -> Callable[[Callable[..., ReturnType]], Callable[..., ReturnType]]:
44
45
  """Return a decorator that executes functions on a remote cluster.
@@ -52,6 +53,7 @@ def databricks_remote_compute(
52
53
  timeout: Optional execution timeout for remote calls.
53
54
  env_keys: Optional environment variable names to forward.
54
55
  force_local: Force local execution
56
+ update_timeout: creation or update wait timeout
55
57
  **options: Extra options forwarded to the execution decorator.
56
58
 
57
59
  Returns:
@@ -82,7 +84,8 @@ def databricks_remote_compute(
82
84
  cluster = workspace.clusters().replicated_current_environment(
83
85
  workspace=workspace,
84
86
  cluster_name=cluster_name,
85
- single_user_name=workspace.current_user.user_name
87
+ single_user_name=workspace.current_user.user_name,
88
+ update_timeout=update_timeout
86
89
  )
87
90
 
88
91
  cluster.ensure_running(wait_timeout=None)
@@ -1,9 +1,8 @@
1
1
  """Databricks SQL helpers and engine wrappers."""
2
2
 
3
3
  from .engine import SQLEngine, StatementResult
4
+ from .exceptions import SqlStatementError
4
5
 
5
6
  # Backwards compatibility
6
7
  DBXSQL = SQLEngine
7
8
  DBXStatementResult = StatementResult
8
-
9
- __all__ = ["SQLEngine", "StatementResult"]
@@ -1 +1,45 @@
1
1
  """Custom exceptions for Databricks SQL helpers."""
2
+ from dataclasses import dataclass
3
+ from typing import Optional, Any
4
+
5
+ __all__ = [
6
+ "SqlStatementError"
7
+ ]
8
+
9
+
10
+ @dataclass(frozen=True)
11
+ class SqlStatementError(RuntimeError):
12
+ statement_id: str
13
+ state: str
14
+ message: str
15
+ error_code: Optional[str] = None
16
+ sql_state: Optional[str] = None
17
+
18
+ def __str__(self) -> str:
19
+ meta = []
20
+ if self.error_code:
21
+ meta.append(f"code={self.error_code}")
22
+ if self.sql_state:
23
+ meta.append(f"state={self.sql_state}")
24
+
25
+ meta_str = f" ({', '.join(meta)})" if meta else ""
26
+ return f"SQL statement {self.statement_id} failed [{self.state}]: {self.message}{meta_str}"
27
+
28
+ @classmethod
29
+ def from_statement(cls, stmt: Any) -> "SqlStatementError":
30
+ statement_id = getattr(stmt, "statement_id", "<unknown>")
31
+ state = getattr(stmt, "state", "<unknown>")
32
+
33
+ err = getattr(getattr(stmt, "status", None), "error", None)
34
+
35
+ message = getattr(err, "message", None) or "Unknown SQL error"
36
+ error_code = getattr(err, "error_code", None)
37
+ sql_state = getattr(err, "sql_state", None)
38
+
39
+ return cls(
40
+ statement_id=str(statement_id),
41
+ state=str(state),
42
+ message=str(message),
43
+ error_code=str(error_code) if error_code is not None else None,
44
+ sql_state=str(sql_state) if sql_state is not None else None,
45
+ )
@@ -9,6 +9,7 @@ from typing import Optional, Iterator, TYPE_CHECKING
9
9
  import pyarrow as pa
10
10
  import pyarrow.ipc as pipc
11
11
 
12
+ from .exceptions import SqlStatementError
12
13
  from .types import column_info_to_arrow_field
13
14
  from ...libs.databrickslib import databricks_sdk
14
15
  from ...libs.pandaslib import pandas
@@ -32,9 +33,7 @@ except ImportError:
32
33
  if databricks_sdk is not None:
33
34
  from databricks.sdk.service.sql import (
34
35
  StatementState, StatementResponse, Disposition, StatementStatus
35
- )
36
-
37
- StatementResponse = StatementResponse
36
+ )
38
37
  else:
39
38
  class StatementResponse:
40
39
  pass
@@ -299,28 +298,8 @@ class StatementResult:
299
298
  )
300
299
 
301
300
  def raise_for_status(self):
302
- """Raise a ValueError if the statement failed.
303
-
304
- Returns:
305
- None.
306
- """
307
301
  if self.failed:
308
- # grab error info if present
309
- err = self.status.error
310
- message = err.message or "Unknown SQL error"
311
- error_code = err.error_code
312
- sql_state = getattr(err, "sql_state", None)
313
-
314
- parts = [message]
315
- if error_code:
316
- parts.append(f"error_code={error_code}")
317
- if sql_state:
318
- parts.append(f"sql_state={sql_state}")
319
-
320
- raise ValueError(
321
- f"Statement {self.statement_id} {self.state}: " + " | ".join(parts)
322
- )
323
-
302
+ raise SqlStatementError.from_statement(self)
324
303
  return self
325
304
 
326
305
  def wait(
@@ -337,22 +316,20 @@ class StatementResult:
337
316
  Returns:
338
317
  The current StatementResult instance.
339
318
  """
340
- if self.done:
341
- return self
342
-
343
- start = time.time()
344
- poll_interval = poll_interval or 1
345
-
346
- while not self.done:
347
- # still running / queued / pending
348
- if timeout is not None and (time.time() - start) > timeout:
349
- raise TimeoutError(
350
- f"Statement {self.statement_id} did not finish within {timeout} seconds "
351
- f"(last state={self.state})"
352
- )
353
-
354
- poll_interval = max(10, poll_interval * 1.2)
355
- time.sleep(poll_interval)
319
+ if not self.done:
320
+ start = time.time()
321
+ poll_interval = poll_interval or 1
322
+
323
+ while not self.done:
324
+ # still running / queued / pending
325
+ if timeout is not None and (time.time() - start) > timeout:
326
+ raise TimeoutError(
327
+ f"Statement {self.statement_id} did not finish within {timeout} seconds "
328
+ f"(last state={self.state})"
329
+ )
330
+
331
+ poll_interval = max(10, poll_interval * 1.2)
332
+ time.sleep(poll_interval)
356
333
 
357
334
  self.raise_for_status()
358
335
 
@@ -2,4 +2,3 @@
2
2
 
3
3
  from .workspace import *
4
4
  from .path import *
5
- from .io import *
@@ -975,28 +975,40 @@ class DatabricksVolumeIO(DatabricksIO):
975
975
  """Read bytes from a volume file.
976
976
 
977
977
  Args:
978
- start: Starting byte offset.
978
+ start: Starting byte offset (0-based).
979
979
  length: Number of bytes to read.
980
980
  allow_not_found: Whether to suppress missing-path errors.
981
981
 
982
982
  Returns:
983
983
  Bytes read from the file.
984
984
  """
985
- if length == 0:
985
+ if length <= 0:
986
986
  return b""
987
+ if start < 0:
988
+ raise ValueError(f"start must be >= 0, got {start}")
989
+ if length < 0:
990
+ raise ValueError(f"length must be >= 0, got {length}")
987
991
 
988
992
  sdk = self.workspace.sdk()
989
993
  client = sdk.files
990
994
  full_path = self.path.files_full_path()
991
995
 
992
- resp = client.download(full_path)
993
- result = (
994
- resp.contents
995
- .seek(start, io.SEEK_SET)
996
- .read(length)
997
- )
996
+ try:
997
+ resp = client.download(full_path)
998
+ except Exception as e:
999
+ # Databricks SDK exceptions vary a bit by version; keep it pragmatic.
1000
+ if allow_not_found and any(s in str(e).lower() for s in ("not found", "does not exist", "404")):
1001
+ return b""
1002
+ raise
1003
+
1004
+ data = resp.contents.read()
998
1005
 
999
- return result
1006
+ # If start is past EOF, return empty (common file-like behavior).
1007
+ if start >= len(data):
1008
+ return b""
1009
+
1010
+ end = start + length
1011
+ return data[start:end]
1000
1012
 
1001
1013
  def write_all_bytes(self, data: bytes):
1002
1014
  """Write bytes to a volume file.
@@ -494,11 +494,15 @@ class DatabricksPath:
494
494
 
495
495
  try:
496
496
  info = sdk.files.get_directory_metadata(full_path)
497
- mtime = (
498
- dt.datetime.strptime(info.last_modified, "%a, %d %b %Y %H:%M:%S %Z").replace(tzinfo=dt.timezone.utc)
499
- if info.last_modified
500
- else None
501
- )
497
+
498
+ if info is None:
499
+ mtime = dt.datetime.now(tz=dt.timezone.utc)
500
+ else:
501
+ mtime = (
502
+ dt.datetime.strptime(info.last_modified, "%a, %d %b %Y %H:%M:%S %Z").replace(tzinfo=dt.timezone.utc)
503
+ if info.last_modified
504
+ else None
505
+ )
502
506
 
503
507
  return self.reset_metadata(is_file=False, is_dir=True, size=info, mtime=mtime)
504
508
  except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
@@ -8,7 +8,6 @@ from abc import ABC
8
8
  from dataclasses import dataclass
9
9
  from pathlib import Path
10
10
  from typing import (
11
- Any,
12
11
  BinaryIO,
13
12
  Iterator,
14
13
  Optional,
@@ -55,7 +54,9 @@ def _get_env_product_version():
55
54
  v = os.getenv("DATABRICKS_PRODUCT_VERSION")
56
55
 
57
56
  if not v:
58
- return YGGDRASIL_VERSION
57
+ if _get_env_product() == "yggdrasil":
58
+ return YGGDRASIL_VERSION
59
+ return None
59
60
  return v.strip().lower()
60
61
 
61
62
 
@@ -106,11 +107,12 @@ class Workspace:
106
107
  product: Optional[str] = dataclasses.field(default_factory=_get_env_product, repr=False)
107
108
  product_version: Optional[str] = dataclasses.field(default_factory=_get_env_product_version, repr=False)
108
109
  product_tag: Optional[str] = dataclasses.field(default_factory=_get_env_product_tag, repr=False)
110
+ custom_tags: Optional[dict] = dataclasses.field(default=None, repr=False)
109
111
 
110
112
  # Runtime cache (never serialized)
111
- _sdk: Any = dataclasses.field(init=False, default=None, repr=False, compare=False, hash=False)
112
- _was_connected: bool = dataclasses.field(init=False, default=False, repr=False, compare=False)
113
- _cached_token: Optional[str] = dataclasses.field(init=False, default=None, repr=False, compare=False)
113
+ _sdk: Optional["WorkspaceClient"] = dataclasses.field(default=None, repr=False, compare=False, hash=False)
114
+ _was_connected: bool = dataclasses.field(default=None, repr=False, compare=False, hash=False)
115
+ _cached_token: Optional[str] = dataclasses.field(default=None, repr=False, compare=False, hash=False)
114
116
 
115
117
  # -------------------------
116
118
  # Pickle support
@@ -175,19 +177,43 @@ class Workspace:
175
177
  # -------------------------
176
178
  def clone_instance(
177
179
  self,
178
- **kwargs
179
180
  ) -> "Workspace":
180
181
  """Clone the workspace config with overrides.
181
182
 
182
- Args:
183
- **kwargs: Field overrides for the clone.
184
-
185
183
  Returns:
186
184
  A new Workspace instance with updated fields.
187
185
  """
188
- state = self.__getstate__()
189
- state.update(kwargs)
190
- return Workspace().__setstate__(state)
186
+ return Workspace(
187
+ host = self.host,
188
+ account_id = self.account_id,
189
+ token = self.token,
190
+ client_id = self.client_id,
191
+ client_secret = self.client_secret,
192
+ token_audience = self.token_audience,
193
+ azure_workspace_resource_id = self.azure_workspace_resource_id,
194
+ azure_use_msi = self.azure_use_msi,
195
+ azure_client_secret = self.azure_client_secret,
196
+ azure_client_id = self.azure_client_id,
197
+ azure_tenant_id = self.azure_tenant_id,
198
+ azure_environment = self.azure_environment,
199
+ google_credentials = self.google_credentials,
200
+ google_service_account = self.google_service_account,
201
+ profile = self.profile,
202
+ config_file = self.config_file,
203
+ auth_type = self.auth_type,
204
+ http_timeout_seconds = self.http_timeout_seconds,
205
+ retry_timeout_seconds = self.retry_timeout_seconds,
206
+ debug_truncate_bytes = self.debug_truncate_bytes,
207
+ debug_headers = self.debug_headers,
208
+ rate_limit = self.rate_limit,
209
+ product = self.product,
210
+ product_version = self.product_version,
211
+ product_tag = self.product_tag,
212
+ custom_tags = self.custom_tags,
213
+ _sdk = self._sdk,
214
+ _was_connected = self._was_connected,
215
+ _cached_token = self._cached_token,
216
+ )
191
217
 
192
218
  # -------------------------
193
219
  # SDK connection
@@ -300,8 +326,9 @@ class Workspace:
300
326
  Drop the cached WorkspaceClient (no actual close needed, but this
301
327
  avoids reusing stale config).
302
328
  """
303
- self._sdk = None
304
- self._was_connected = False
329
+ if self._sdk is not None:
330
+ self._sdk = None
331
+ self._was_connected = False
305
332
 
306
333
  # ------------------------------------------------------------------ #
307
334
  # Properties
@@ -561,28 +588,19 @@ class Workspace:
561
588
  Returns:
562
589
  A dict of default tags.
563
590
  """
564
- return {
591
+ base = {
565
592
  k: v
566
593
  for k, v in (
567
594
  ("Product", self.product),
568
- ("ProductVersion", self.product_version),
569
595
  ("ProductTag", self.product_tag),
570
- ("ProductUser", self.current_user.user_name)
571
596
  )
572
597
  if v
573
598
  }
574
599
 
575
- def merge_tags(self, existing: dict | None = None):
576
- """Merge default tags with an existing set.
577
-
578
- Args:
579
- existing: Optional existing tags.
600
+ if self.custom_tags:
601
+ base.update(self.custom_tags)
580
602
 
581
- Returns:
582
- A dict of merged tags.
583
- """
584
- if existing:
585
- return self.default_tags()
603
+ return base
586
604
 
587
605
  def sql(
588
606
  self,
@@ -1,5 +1,3 @@
1
1
  """Enhanced dataclass helpers with Arrow awareness."""
2
2
 
3
- from .dataclass import yggdataclass
4
-
5
- __all__ = ["yggdataclass"]
3
+ from .dataclass import get_dataclass_arrow_field
@@ -2,32 +2,17 @@
2
2
 
3
3
  import dataclasses
4
4
  from inspect import isclass
5
- from typing import Any, Iterable, Mapping, Tuple
5
+ from typing import Any
6
6
 
7
7
  import pyarrow as pa
8
8
 
9
9
  __all__ = [
10
- "yggdataclass",
11
- "is_yggdataclass",
12
10
  "get_dataclass_arrow_field"
13
11
  ]
14
12
 
15
13
  DATACLASS_ARROW_FIELD_CACHE: dict[type, pa.Field] = {}
16
14
 
17
15
 
18
- def is_yggdataclass(cls_or_instance: Any) -> bool:
19
- """Check if a class or instance is a yggdrasil dataclass.
20
-
21
- Args:
22
- cls_or_instance: The class or instance to check.
23
-
24
- Returns:
25
- True if the class or instance
26
- is a yggdrasil dataclass, False otherwise.
27
- """
28
- return hasattr(cls_or_instance, "__arrow_field__")
29
-
30
-
31
16
  def get_dataclass_arrow_field(cls_or_instance: Any) -> pa.Field:
32
17
  """Return a cached Arrow Field describing the dataclass type.
33
18
 
@@ -37,9 +22,6 @@ def get_dataclass_arrow_field(cls_or_instance: Any) -> pa.Field:
37
22
  Returns:
38
23
  Arrow field describing the dataclass schema.
39
24
  """
40
- if is_yggdataclass(cls_or_instance):
41
- return cls_or_instance.__arrow_field__()
42
-
43
25
  if dataclasses.is_dataclass(cls_or_instance):
44
26
  cls = cls_or_instance
45
27
  if not isclass(cls_or_instance):
@@ -56,151 +38,3 @@ def get_dataclass_arrow_field(cls_or_instance: Any) -> pa.Field:
56
38
  return built
57
39
 
58
40
  raise ValueError(f"{cls_or_instance!r} is not a dataclass or yggdrasil dataclass")
59
-
60
-
61
- def yggdataclass(
62
- cls=None, /,
63
- *,
64
- init=True,
65
- repr=True,
66
- eq=True,
67
- order=False,
68
- unsafe_hash=False, frozen=False, match_args=True,
69
- kw_only=False, slots=False,
70
- weakref_slot=False
71
- ):
72
- """Decorate a class with dataclass behavior plus Arrow helpers.
73
-
74
- Examines PEP 526 __annotations__ to determine fields.
75
-
76
- If init is true, an __init__() method is added to the class. If repr
77
- is true, a __repr__() method is added. If order is true, rich
78
- comparison dunder methods are added. If unsafe_hash is true, a
79
- __hash__() method is added. If frozen is true, fields may not be
80
- assigned to after instance creation. If match_args is true, the
81
- __match_args__ tuple is added. If kw_only is true, then by default
82
- all fields are keyword-only. If slots is true, a new class with a
83
- __slots__ attribute is returned.
84
- """
85
-
86
- def wrap(c):
87
- """Wrap a class with yggdrasil dataclass enhancements.
88
-
89
- Args:
90
- c: Class to decorate.
91
-
92
- Returns:
93
- Decorated dataclass type.
94
- """
95
-
96
- def _init_public_fields(cls):
97
- """Return init-enabled, public dataclass fields.
98
-
99
- Args:
100
- cls: Dataclass type.
101
-
102
- Returns:
103
- List of dataclasses.Field objects.
104
- """
105
- return [
106
- field
107
- for field in dataclasses.fields(cls)
108
- if field.init and not field.name.startswith("_")
109
- ]
110
-
111
- if not hasattr(c, "default_instance"):
112
- @classmethod
113
- def default_instance(cls):
114
- """Return a default instance built from type defaults.
115
-
116
- Returns:
117
- Default instance of the dataclass.
118
- """
119
- from yggdrasil.types import default_scalar
120
-
121
- if not hasattr(cls, "__default_instance__"):
122
- cls.__default_instance__ = default_scalar(cls)
123
-
124
- return dataclasses.replace(cls.__default_instance__)
125
-
126
- c.default_instance = default_instance
127
-
128
- if not hasattr(c, "__safe_init__"):
129
- @classmethod
130
- def __safe_init__(cls, *args, **kwargs):
131
- """Safely initialize a dataclass using type conversion and defaults."""
132
-
133
- fields = _init_public_fields(cls)
134
- field_names = [field.name for field in fields]
135
-
136
- if len(args) > len(field_names):
137
- raise TypeError(
138
- f"Expected at most {len(field_names)} positional arguments, got {len(args)}"
139
- )
140
-
141
- provided = {name: value for name, value in zip(field_names, args)}
142
-
143
- for key, value in kwargs.items():
144
- if key in provided:
145
- raise TypeError(f"Got multiple values for argument '{key}'")
146
- if key not in field_names:
147
- raise TypeError(
148
- f"{key!r} is an invalid field for {cls.__name__}"
149
- )
150
-
151
- provided[key] = value
152
-
153
- from yggdrasil.types.cast import convert
154
-
155
- defaults = cls.default_instance()
156
- init_kwargs = {}
157
-
158
- for field in fields:
159
- if field.name in provided:
160
- init_kwargs[field.name] = convert(provided[field.name], field.type)
161
- else:
162
- init_kwargs[field.name] = getattr(defaults, field.name, None)
163
-
164
- return cls(**init_kwargs)
165
-
166
- c.__safe_init__ = __safe_init__
167
-
168
- if not hasattr(c, "__arrow_field__"):
169
- @classmethod
170
- def __arrow_field__(cls, name: str | None = None):
171
- """Return an Arrow field representing the dataclass schema.
172
-
173
- Args:
174
- name: Optional override for the field name.
175
-
176
- Returns:
177
- Arrow field describing the dataclass schema.
178
- """
179
- from yggdrasil.types.python_arrow import arrow_field_from_hint
180
-
181
- return arrow_field_from_hint(cls, name=name)
182
-
183
- c.__arrow_field__ = __arrow_field__
184
-
185
- base = dataclasses.dataclass(
186
- c,
187
- init=init,
188
- repr=repr,
189
- eq=eq,
190
- order=order,
191
- unsafe_hash=unsafe_hash,
192
- frozen=frozen,
193
- match_args=match_args,
194
- kw_only=kw_only,
195
- slots=slots,
196
- )
197
-
198
- return base
199
-
200
- # See if we're being called as @dataclass or @dataclass().
201
- if cls is None:
202
- # We're called with parens.
203
- return wrap
204
-
205
- # We're called as @dataclass without parens.
206
- return wrap(cls)
@@ -1,6 +1,3 @@
1
1
  """Helper utilities for optional dependency integrations."""
2
2
 
3
- from .sparklib import *
4
- from .polarslib import *
5
- from .pandaslib import *
6
3
  from .extensions import *
@@ -16,7 +16,7 @@ import sys
16
16
  import tempfile
17
17
  import threading
18
18
  from contextlib import contextmanager
19
- from dataclasses import dataclass
19
+ from dataclasses import dataclass, field
20
20
  from pathlib import Path
21
21
  from typing import Any, Iterable, Iterator, Mapping, MutableMapping, Optional, Union, List, Tuple
22
22
 
@@ -415,11 +415,13 @@ def _locked_env(root: Path):
415
415
  # PythonEnv
416
416
  # -----------------------
417
417
 
418
- @dataclass(frozen=True)
418
+ @dataclass
419
419
  class PythonEnv:
420
420
  """Represent a managed Python environment rooted at a filesystem path."""
421
421
  root: Path
422
422
 
423
+ _version: Optional[str] = field(default=None, repr=False)
424
+
423
425
  def __post_init__(self) -> None:
424
426
  """Normalize the root path after dataclass initialization.
425
427
 
@@ -862,8 +864,9 @@ class PythonEnv:
862
864
  Returns:
863
865
  Version string.
864
866
  """
865
- out = self.exec_code("import sys; print(sys.version.split()[0])", check=True)
866
- return out.strip()
867
+ if self._version is None:
868
+ self._version = self.exec_code("import sys; print(sys.version.split()[0])", check=True).strip()
869
+ return self._version
867
870
 
868
871
  @property
869
872
  def version_info(self) -> tuple[int, int, int]:
@@ -1,5 +1,4 @@
1
1
  """Convenience imports for request session helpers."""
2
2
 
3
- from .msal import MSALSession, MSALAuth
4
-
5
- __all__ = ["MSALSession", "MSALAuth"]
3
+ from .msal import *
4
+ from .session import *
@@ -7,16 +7,15 @@ import pyarrow as pa
7
7
 
8
8
  from .registry import convert
9
9
  from ..python_arrow import is_arrow_type_list_like
10
- from ...dataclasses import yggdataclass
10
+ from ...libs.polarslib import polars
11
+ from ...libs.sparklib import pyspark
11
12
 
12
13
  __all__ = [
13
14
  "CastOptions",
14
15
  ]
15
16
 
16
- from ...libs import pyspark, polars
17
17
 
18
-
19
- @yggdataclass
18
+ @dataclasses.dataclass
20
19
  class CastOptions:
21
20
  """
22
21
  Options controlling Arrow casting behavior.
yggdrasil/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.1.47"
1
+ __version__ = "0.1.49"
yggdrasil/types/libs.py DELETED
@@ -1,12 +0,0 @@
1
- """Re-export optional dependency helpers for types modules."""
2
-
3
- from ..libs import pandas, polars, pyspark, require_pandas, require_polars, require_pyspark
4
-
5
- __all__ = [
6
- "pandas",
7
- "polars",
8
- "pyspark",
9
- "require_pandas",
10
- "require_polars",
11
- "require_pyspark",
12
- ]
File without changes