ygg 0.1.48__py3-none-any.whl → 0.1.49__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ygg
3
- Version: 0.1.48
3
+ Version: 0.1.49
4
4
  Summary: Type-friendly utilities for moving data between Python objects, Arrow, Polars, Pandas, Spark, and Databricks
5
5
  Author: Yggdrasil contributors
6
6
  License: Apache License
@@ -235,6 +235,8 @@ Requires-Dist: pytest-asyncio; extra == "dev"
235
235
  Requires-Dist: black; extra == "dev"
236
236
  Requires-Dist: ruff; extra == "dev"
237
237
  Requires-Dist: mypy; extra == "dev"
238
+ Requires-Dist: build; extra == "dev"
239
+ Requires-Dist: twine; extra == "dev"
238
240
  Dynamic: license-file
239
241
 
240
242
  # Yggdrasil (Python)
@@ -1,11 +1,11 @@
1
- ygg-0.1.48.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
1
+ ygg-0.1.49.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
2
2
  yggdrasil/__init__.py,sha256=4-ghPak2S6zfMqmnlxW2GCgPb5s79znpKa2hGEGXcE4,24
3
- yggdrasil/version.py,sha256=GL56LdSW6fsXlq5LHiGjsIVgyhxVQeeDdO3Sd6nzZYc,22
3
+ yggdrasil/version.py,sha256=pnii9XXudF0U50FobVvNgNzGy9lA9q_DntGQAvyqaFA,22
4
4
  yggdrasil/databricks/__init__.py,sha256=skctY2c8W-hI81upx9F_PWRe5ishL3hrdiTuizgDjdw,152
5
5
  yggdrasil/databricks/compute/__init__.py,sha256=NvdzmaJSNYY1uJthv1hHdBuNu3bD_-Z65DWnaJt9yXg,289
6
- yggdrasil/databricks/compute/cluster.py,sha256=0QjYHlaXSMgYqzMRy1Jypm2j7xoGRkPdwURZsQn_73U,43228
7
- yggdrasil/databricks/compute/execution_context.py,sha256=anOxfNms83dZ5FTknbfT8uj889LjheMqEx9W5NtJC9E,23094
8
- yggdrasil/databricks/compute/remote.py,sha256=nEN_Fr1Ouul_iKOf4B5QjEGscYAcl7nHjGsl2toRzrU,2874
6
+ yggdrasil/databricks/compute/cluster.py,sha256=YomLfvB0oxbgl6WDgBRxI1UXsxwlEbR6gq3FUbPHscY,44199
7
+ yggdrasil/databricks/compute/execution_context.py,sha256=jIV6uru2NeX3O5lg-3KEqmXtLxxq45CFgkBQgQIIOHQ,23327
8
+ yggdrasil/databricks/compute/remote.py,sha256=yicEhyQypssRa2ByscO36s3cBkEgORFsRME9aaq91Pc,3045
9
9
  yggdrasil/databricks/jobs/__init__.py,sha256=snxGSJb0M5I39v0y3IR-uEeSlZR248cQ_4DJ1sYs-h8,154
10
10
  yggdrasil/databricks/jobs/config.py,sha256=9LGeHD04hbfy0xt8_6oobC4moKJh4_DTjZiK4Q2Tqjk,11557
11
11
  yggdrasil/databricks/sql/__init__.py,sha256=Vp_1cFaX1l-JGzCknvkbiB8CBFX2fQbBNntIeVn3lEg,231
@@ -16,10 +16,10 @@ yggdrasil/databricks/sql/types.py,sha256=5G-BM9_eOsRKEMzeDTWUsWW5g4Idvs-czVCpOCr
16
16
  yggdrasil/databricks/sql/warehouse.py,sha256=1J0dyQLJb-OS1_1xU1eAVZ4CoL2-FhFeowKSvU3RzFc,9773
17
17
  yggdrasil/databricks/workspaces/__init__.py,sha256=dv2zotoFVhNFlTCdRq6gwf5bEzeZkOZszoNZMs0k59g,114
18
18
  yggdrasil/databricks/workspaces/filesytem.py,sha256=Z8JXU7_XUEbw9fpTQT1avRQKi-IAP2KemXBMPkUoY4w,9805
19
- yggdrasil/databricks/workspaces/io.py,sha256=Tdde4LaGNJNT50R11OkEYZyNacyIW9QrOXMAicAlIr4,32208
20
- yggdrasil/databricks/workspaces/path.py,sha256=-XnCD9p42who3DAwnITVE1KyrZUSoXDKHA8iZi-7wk4,47743
19
+ yggdrasil/databricks/workspaces/io.py,sha256=CDq9NsYFjlSJ1QbKFlfWvZLQPVoWyZ4b3XR_lxNPcZE,32776
20
+ yggdrasil/databricks/workspaces/path.py,sha256=BxDwxE7q1-NLKEZQT4xLM3LeCeQKO3wUy7R-Ce-cSMk,47875
21
21
  yggdrasil/databricks/workspaces/path_kind.py,sha256=Xc319NysH8_6E9C0Q8nCxDHYG07_SnzyUVKHe0dNdDQ,305
22
- yggdrasil/databricks/workspaces/workspace.py,sha256=c6CBBun2BskEnsP74pbLVOe_TKXZs4L4r4gPQtIzlQE,23821
22
+ yggdrasil/databricks/workspaces/workspace.py,sha256=zBlQdYNT_xKwUCYo3O4Q4g-8pfMvff3I26efyCfY_TY,24961
23
23
  yggdrasil/dataclasses/__init__.py,sha256=_RkhfF3KC1eSORby1dzvBXQ0-UGG3u6wyUQWX2jq1Pc,108
24
24
  yggdrasil/dataclasses/dataclass.py,sha256=LxrCjwvmBnb8yRI_N-c31RHHxB4XoJPixmKg9iBIuaI,1148
25
25
  yggdrasil/libs/__init__.py,sha256=zdC9OU0Xy36CLY9mg2drxN6S7isPR8aTLzJA6xVIeLE,91
@@ -37,7 +37,7 @@ yggdrasil/pyutils/exceptions.py,sha256=ssKNm-rjhavHUOZmGA7_1Gq9tSHDrb2EFI-cnBuWg
37
37
  yggdrasil/pyutils/expiring_dict.py,sha256=pr2u25LGwPVbLfsLptiHGovUtYRRo0AMjaJtCtJl7nQ,8477
38
38
  yggdrasil/pyutils/modules.py,sha256=B7IP99YqUMW6-DIESFzBx8-09V1d0a8qrIJUDFhhL2g,11424
39
39
  yggdrasil/pyutils/parallel.py,sha256=ubuq2m9dJzWYUyKCga4Y_9bpaeMYUrleYxdp49CHr44,6781
40
- yggdrasil/pyutils/python_env.py,sha256=tuglnjdqHQjNh18qDladVoSEOjCD0RcnMEPYJ0tArOs,50985
40
+ yggdrasil/pyutils/python_env.py,sha256=Gh5geFK9ABpyWEfyegGUfIJUoPxKwcH0pqLBiMrW9Rw,51103
41
41
  yggdrasil/pyutils/retry.py,sha256=n5sr-Zu7fYrdLbjJ4WifK2lk0gEGmHv5FYt2HaCm1Qc,11916
42
42
  yggdrasil/requests/__init__.py,sha256=dMesyzq97_DmI765x0TwaDPEfsxFtgGNgchk8LvEN-o,103
43
43
  yggdrasil/requests/msal.py,sha256=s2GCyzbgFdgdlJ1JqMrZ4qYVbmoG46-ZOTcaVQhZ-sQ,9220
@@ -55,8 +55,8 @@ yggdrasil/types/cast/registry.py,sha256=_zdFGmUBB7P-e_LIcJlOxMcxAkXoA-UXB6HqLMgT
55
55
  yggdrasil/types/cast/spark_cast.py,sha256=_KAsl1DqmKMSfWxqhVE7gosjYdgiL1C5bDQv6eP3HtA,24926
56
56
  yggdrasil/types/cast/spark_pandas_cast.py,sha256=BuTiWrdCANZCdD_p2MAytqm74eq-rdRXd-LGojBRrfU,5023
57
57
  yggdrasil/types/cast/spark_polars_cast.py,sha256=btmZNHXn2NSt3fUuB4xg7coaE0RezIBdZD92H8NK0Jw,9073
58
- ygg-0.1.48.dist-info/METADATA,sha256=gpScM9WWu0y7C5ebXB6gsJBe9VbehZEU__E7HfWp8hk,18452
59
- ygg-0.1.48.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
60
- ygg-0.1.48.dist-info/entry_points.txt,sha256=6q-vpWG3kvw2dhctQ0LALdatoeefkN855Ev02I1dKGY,70
61
- ygg-0.1.48.dist-info/top_level.txt,sha256=iBe9Kk4VIVbLpgv_p8OZUIfxgj4dgJ5wBg6vO3rigso,10
62
- ygg-0.1.48.dist-info/RECORD,,
58
+ ygg-0.1.49.dist-info/METADATA,sha256=CHTqeVyiYa1868ZDwISDHKyXYxPeUH0mHhvHLYYoDbg,18528
59
+ ygg-0.1.49.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
60
+ ygg-0.1.49.dist-info/entry_points.txt,sha256=6q-vpWG3kvw2dhctQ0LALdatoeefkN855Ev02I1dKGY,70
61
+ ygg-0.1.49.dist-info/top_level.txt,sha256=iBe9Kk4VIVbLpgv_p8OZUIfxgj4dgJ5wBg6vO3rigso,10
62
+ ygg-0.1.49.dist-info/RECORD,,
@@ -144,6 +144,7 @@ class Cluster(WorkspaceService):
144
144
  single_user_name: Optional[str] = None,
145
145
  runtime_engine: Optional["RuntimeEngine"] = None,
146
146
  libraries: Optional[list[str]] = None,
147
+ update_timeout: Optional[Union[float, dt.timedelta]] = dt.timedelta(minutes=20),
147
148
  **kwargs
148
149
  ) -> "Cluster":
149
150
  """Create or reuse a cluster that mirrors the current Python environment.
@@ -152,9 +153,10 @@ class Cluster(WorkspaceService):
152
153
  workspace: Workspace to use for the cluster.
153
154
  cluster_id: Optional cluster id to reuse.
154
155
  cluster_name: Optional cluster name to reuse.
155
- single_user_name: Optional user name for single-user clusters.
156
+ single_user_name: Optional username for single-user clusters.
156
157
  runtime_engine: Optional Databricks runtime engine.
157
158
  libraries: Optional list of libraries to install.
159
+ update_timeout: wait timeout, if None it will not wait completion
158
160
  **kwargs: Additional cluster specification overrides.
159
161
 
160
162
  Returns:
@@ -176,6 +178,7 @@ class Cluster(WorkspaceService):
176
178
  single_user_name=single_user_name,
177
179
  runtime_engine=runtime_engine,
178
180
  libraries=libraries,
181
+ update_timeout=update_timeout,
179
182
  **kwargs
180
183
  )
181
184
  )
@@ -190,6 +193,7 @@ class Cluster(WorkspaceService):
190
193
  single_user_name: Optional[str] = "current",
191
194
  runtime_engine: Optional["RuntimeEngine"] = None,
192
195
  libraries: Optional[list[str]] = None,
196
+ update_timeout: Optional[Union[float, dt.timedelta]] = dt.timedelta(minutes=20),
193
197
  **kwargs
194
198
  ) -> "Cluster":
195
199
  """Create/update a cluster to match the local Python environment.
@@ -198,9 +202,10 @@ class Cluster(WorkspaceService):
198
202
  source: Optional PythonEnv to mirror (defaults to current).
199
203
  cluster_id: Optional cluster id to update.
200
204
  cluster_name: Optional cluster name to update.
201
- single_user_name: Optional single user name for the cluster.
205
+ single_user_name: Optional single username for the cluster.
202
206
  runtime_engine: Optional runtime engine selection.
203
207
  libraries: Optional list of libraries to install.
208
+ update_timeout: wait timeout, if None it will not wait completion
204
209
  **kwargs: Additional cluster specification overrides.
205
210
 
206
211
  Returns:
@@ -242,6 +247,7 @@ class Cluster(WorkspaceService):
242
247
  single_user_name=single_user_name,
243
248
  runtime_engine=runtime_engine or RuntimeEngine.PHOTON,
244
249
  libraries=libraries,
250
+ update_timeout=update_timeout,
245
251
  **kwargs
246
252
  )
247
253
 
@@ -380,7 +386,9 @@ class Cluster(WorkspaceService):
380
386
  start = time.time()
381
387
  sleep_time = tick
382
388
 
383
- if isinstance(timeout, dt.timedelta):
389
+ if not timeout:
390
+ timeout = 20 * 60.0
391
+ elif isinstance(timeout, dt.timedelta):
384
392
  timeout = timeout.total_seconds()
385
393
 
386
394
  while self.is_pending:
@@ -412,12 +420,14 @@ class Cluster(WorkspaceService):
412
420
  # Extract "major.minor" from strings like "17.3.x-scala2.13-ml-gpu"
413
421
  v = self.spark_version
414
422
 
415
- if v is None:
423
+ if not v:
416
424
  return None
417
425
 
418
426
  parts = v.split(".")
427
+
419
428
  if len(parts) < 2:
420
429
  return None
430
+
421
431
  return ".".join(parts[:2]) # e.g. "17.3"
422
432
 
423
433
  @property
@@ -428,8 +438,10 @@ class Cluster(WorkspaceService):
428
438
  When the runtime can't be mapped, returns ``None``.
429
439
  """
430
440
  v = self.runtime_version
431
- if v is None:
441
+
442
+ if not v:
432
443
  return None
444
+
433
445
  return _PYTHON_BY_DBR.get(v)
434
446
 
435
447
  # ------------------------------------------------------------------ #
@@ -586,6 +598,7 @@ class Cluster(WorkspaceService):
586
598
  cluster_id: Optional[str] = None,
587
599
  cluster_name: Optional[str] = None,
588
600
  libraries: Optional[List[Union[str, "Library"]]] = None,
601
+ update_timeout: Optional[Union[float, dt.timedelta]] = dt.timedelta(minutes=20),
589
602
  **cluster_spec: Any
590
603
  ):
591
604
  """Create a new cluster or update an existing one.
@@ -594,6 +607,7 @@ class Cluster(WorkspaceService):
594
607
  cluster_id: Optional cluster id to update.
595
608
  cluster_name: Optional cluster name to update or create.
596
609
  libraries: Optional libraries to install.
610
+ update_timeout: wait timeout, if None it will not wait completion
597
611
  **cluster_spec: Cluster specification overrides.
598
612
 
599
613
  Returns:
@@ -609,24 +623,28 @@ class Cluster(WorkspaceService):
609
623
  return found.update(
610
624
  cluster_name=cluster_name,
611
625
  libraries=libraries,
626
+ wait_timeout=update_timeout,
612
627
  **cluster_spec
613
628
  )
614
629
 
615
630
  return self.create(
616
631
  cluster_name=cluster_name,
617
632
  libraries=libraries,
633
+ wait_timeout=update_timeout,
618
634
  **cluster_spec
619
635
  )
620
636
 
621
637
  def create(
622
638
  self,
623
639
  libraries: Optional[List[Union[str, "Library"]]] = None,
640
+ wait_timeout: Union[float, dt.timedelta] = dt.timedelta(minutes=20),
624
641
  **cluster_spec: Any
625
642
  ) -> str:
626
643
  """Create a new cluster and optionally install libraries.
627
644
 
628
645
  Args:
629
646
  libraries: Optional list of libraries to install after creation.
647
+ wait_timeout: wait timeout, if None it will not wait completion
630
648
  **cluster_spec: Cluster specification overrides.
631
649
 
632
650
  Returns:
@@ -646,14 +664,17 @@ class Cluster(WorkspaceService):
646
664
  update_details,
647
665
  )
648
666
 
649
- self.details = self.clusters_client().create_and_wait(**update_details)
667
+ self.details = self.clusters_client().create(**update_details)
650
668
 
651
669
  LOGGER.info(
652
670
  "Created %s",
653
671
  self
654
672
  )
655
673
 
656
- self.install_libraries(libraries=libraries, raise_error=False)
674
+ self.install_libraries(libraries=libraries, raise_error=False, wait_timeout=None)
675
+
676
+ if wait_timeout:
677
+ self.wait_for_status(timeout=wait_timeout)
657
678
 
658
679
  return self
659
680
 
@@ -661,7 +682,7 @@ class Cluster(WorkspaceService):
661
682
  self,
662
683
  libraries: Optional[List[Union[str, "Library"]]] = None,
663
684
  access_control_list: Optional[List["ClusterAccessControlRequest"]] = None,
664
- wait_timeout: Union[float, dt.timedelta] = dt.timedelta(minutes=20),
685
+ wait_timeout: Optional[Union[float, dt.timedelta]] = dt.timedelta(minutes=20),
665
686
  **cluster_spec: Any
666
687
  ) -> "Cluster":
667
688
  """Update cluster configuration and optionally install libraries.
@@ -708,7 +729,7 @@ class Cluster(WorkspaceService):
708
729
  self, diff
709
730
  )
710
731
 
711
- self.wait_for_status()
732
+ self.wait_for_status(timeout=wait_timeout)
712
733
  self.clusters_client().edit(**update_details)
713
734
  self.update_permissions(access_control_list=access_control_list)
714
735
 
@@ -727,7 +748,7 @@ class Cluster(WorkspaceService):
727
748
  access_control_list: Optional[List["ClusterAccessControlRequest"]] = None,
728
749
  ):
729
750
  if not access_control_list:
730
- access_control_list = self.default_permissions()
751
+ return self
731
752
 
732
753
  access_control_list = self._check_permission(access_control_list)
733
754
 
@@ -745,6 +766,7 @@ class Cluster(WorkspaceService):
745
766
  permission_level=ClusterPermissionLevel.CAN_MANAGE
746
767
  )
747
768
  for name in current_groups
769
+ if name not in {"users"}
748
770
  ]
749
771
 
750
772
  def _check_permission(
@@ -862,18 +884,22 @@ class Cluster(WorkspaceService):
862
884
  Returns:
863
885
  The current Cluster instance.
864
886
  """
887
+ if self.is_running:
888
+ return self
889
+
865
890
  self.wait_for_status()
866
891
 
867
- if not self.is_running:
868
- LOGGER.debug("Starting %s", self)
892
+ if self.is_running:
893
+ return self
869
894
 
870
- if wait_timeout:
871
- self.clusters_client().start(cluster_id=self.cluster_id)
872
- self.wait_for_status(timeout=wait_timeout.total_seconds())
873
- else:
874
- self.clusters_client().start(cluster_id=self.cluster_id)
895
+ LOGGER.debug("Starting %s", self)
896
+
897
+ self.clusters_client().start(cluster_id=self.cluster_id)
875
898
 
876
- LOGGER.info("Started %s", self)
899
+ LOGGER.info("Started %s", self)
900
+
901
+ if wait_timeout:
902
+ self.wait_for_status(timeout=wait_timeout.total_seconds())
877
903
 
878
904
  return self
879
905
 
@@ -889,7 +915,7 @@ class Cluster(WorkspaceService):
889
915
 
890
916
  if self.is_running:
891
917
  self.details = self.clusters_client().restart_and_wait(cluster_id=self.cluster_id)
892
- return self.wait_for_status()
918
+ return self
893
919
 
894
920
  return self.start()
895
921
 
@@ -180,7 +180,7 @@ print(json.dumps(meta))"""
180
180
  """
181
181
  return self.cluster.workspace.sdk()
182
182
 
183
- def _create_command(
183
+ def create_command(
184
184
  self,
185
185
  language: "Language",
186
186
  ) -> any:
@@ -192,17 +192,29 @@ print(json.dumps(meta))"""
192
192
  Returns:
193
193
  The created command execution context response.
194
194
  """
195
- self.cluster.ensure_running()
196
-
197
195
  LOGGER.debug(
198
196
  "Creating Databricks command execution context for %s",
199
197
  self.cluster
200
198
  )
201
199
 
202
- created = self._workspace_client().command_execution.create_and_wait(
203
- cluster_id=self.cluster.cluster_id,
204
- language=language,
200
+ try:
201
+ created = self._workspace_client().command_execution.create_and_wait(
202
+ cluster_id=self.cluster.cluster_id,
203
+ language=language,
204
+ )
205
+ except:
206
+ self.cluster.ensure_running()
207
+
208
+ created = self._workspace_client().command_execution.create_and_wait(
209
+ cluster_id=self.cluster.cluster_id,
210
+ language=language,
211
+ )
212
+
213
+ LOGGER.info(
214
+ "Created Databricks command execution context %s",
215
+ self
205
216
  )
217
+
206
218
  created = getattr(created, "response", created)
207
219
 
208
220
  return created
@@ -220,10 +232,6 @@ print(json.dumps(meta))"""
220
232
  The connected ExecutionContext instance.
221
233
  """
222
234
  if self.context_id is not None:
223
- LOGGER.debug(
224
- "Execution context already open for %s",
225
- self
226
- )
227
235
  return self
228
236
 
229
237
  self.language = language or self.language
@@ -231,7 +239,7 @@ print(json.dumps(meta))"""
231
239
  if self.language is None:
232
240
  self.language = Language.PYTHON
233
241
 
234
- ctx = self._create_command(language=self.language)
242
+ ctx = self.create_command(language=self.language)
235
243
 
236
244
  context_id = ctx.id
237
245
  if not context_id:
@@ -39,6 +39,7 @@ def databricks_remote_compute(
39
39
  timeout: Optional[dt.timedelta] = None,
40
40
  env_keys: Optional[List[str]] = None,
41
41
  force_local: bool = False,
42
+ update_timeout: Optional[Union[float, dt.timedelta]] = None,
42
43
  **options
43
44
  ) -> Callable[[Callable[..., ReturnType]], Callable[..., ReturnType]]:
44
45
  """Return a decorator that executes functions on a remote cluster.
@@ -52,6 +53,7 @@ def databricks_remote_compute(
52
53
  timeout: Optional execution timeout for remote calls.
53
54
  env_keys: Optional environment variable names to forward.
54
55
  force_local: Force local execution
56
+ update_timeout: creation or update wait timeout
55
57
  **options: Extra options forwarded to the execution decorator.
56
58
 
57
59
  Returns:
@@ -82,7 +84,8 @@ def databricks_remote_compute(
82
84
  cluster = workspace.clusters().replicated_current_environment(
83
85
  workspace=workspace,
84
86
  cluster_name=cluster_name,
85
- single_user_name=workspace.current_user.user_name
87
+ single_user_name=workspace.current_user.user_name,
88
+ update_timeout=update_timeout
86
89
  )
87
90
 
88
91
  cluster.ensure_running(wait_timeout=None)
@@ -975,28 +975,40 @@ class DatabricksVolumeIO(DatabricksIO):
975
975
  """Read bytes from a volume file.
976
976
 
977
977
  Args:
978
- start: Starting byte offset.
978
+ start: Starting byte offset (0-based).
979
979
  length: Number of bytes to read.
980
980
  allow_not_found: Whether to suppress missing-path errors.
981
981
 
982
982
  Returns:
983
983
  Bytes read from the file.
984
984
  """
985
- if length == 0:
985
+ if length <= 0:
986
986
  return b""
987
+ if start < 0:
988
+ raise ValueError(f"start must be >= 0, got {start}")
989
+ if length < 0:
990
+ raise ValueError(f"length must be >= 0, got {length}")
987
991
 
988
992
  sdk = self.workspace.sdk()
989
993
  client = sdk.files
990
994
  full_path = self.path.files_full_path()
991
995
 
992
- resp = client.download(full_path)
993
- result = (
994
- resp.contents
995
- .seek(start, io.SEEK_SET)
996
- .read(length)
997
- )
996
+ try:
997
+ resp = client.download(full_path)
998
+ except Exception as e:
999
+ # Databricks SDK exceptions vary a bit by version; keep it pragmatic.
1000
+ if allow_not_found and any(s in str(e).lower() for s in ("not found", "does not exist", "404")):
1001
+ return b""
1002
+ raise
1003
+
1004
+ data = resp.contents.read()
998
1005
 
999
- return result
1006
+ # If start is past EOF, return empty (common file-like behavior).
1007
+ if start >= len(data):
1008
+ return b""
1009
+
1010
+ end = start + length
1011
+ return data[start:end]
1000
1012
 
1001
1013
  def write_all_bytes(self, data: bytes):
1002
1014
  """Write bytes to a volume file.
@@ -494,11 +494,15 @@ class DatabricksPath:
494
494
 
495
495
  try:
496
496
  info = sdk.files.get_directory_metadata(full_path)
497
- mtime = (
498
- dt.datetime.strptime(info.last_modified, "%a, %d %b %Y %H:%M:%S %Z").replace(tzinfo=dt.timezone.utc)
499
- if info.last_modified
500
- else None
501
- )
497
+
498
+ if info is None:
499
+ mtime = dt.datetime.now(tz=dt.timezone.utc)
500
+ else:
501
+ mtime = (
502
+ dt.datetime.strptime(info.last_modified, "%a, %d %b %Y %H:%M:%S %Z").replace(tzinfo=dt.timezone.utc)
503
+ if info.last_modified
504
+ else None
505
+ )
502
506
 
503
507
  return self.reset_metadata(is_file=False, is_dir=True, size=info, mtime=mtime)
504
508
  except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
@@ -8,7 +8,6 @@ from abc import ABC
8
8
  from dataclasses import dataclass
9
9
  from pathlib import Path
10
10
  from typing import (
11
- Any,
12
11
  BinaryIO,
13
12
  Iterator,
14
13
  Optional,
@@ -55,7 +54,9 @@ def _get_env_product_version():
55
54
  v = os.getenv("DATABRICKS_PRODUCT_VERSION")
56
55
 
57
56
  if not v:
58
- return YGGDRASIL_VERSION
57
+ if _get_env_product() == "yggdrasil":
58
+ return YGGDRASIL_VERSION
59
+ return None
59
60
  return v.strip().lower()
60
61
 
61
62
 
@@ -106,11 +107,12 @@ class Workspace:
106
107
  product: Optional[str] = dataclasses.field(default_factory=_get_env_product, repr=False)
107
108
  product_version: Optional[str] = dataclasses.field(default_factory=_get_env_product_version, repr=False)
108
109
  product_tag: Optional[str] = dataclasses.field(default_factory=_get_env_product_tag, repr=False)
110
+ custom_tags: Optional[dict] = dataclasses.field(default=None, repr=False)
109
111
 
110
112
  # Runtime cache (never serialized)
111
- _sdk: Any = dataclasses.field(init=False, default=None, repr=False, compare=False, hash=False)
112
- _was_connected: bool = dataclasses.field(init=False, default=False, repr=False, compare=False)
113
- _cached_token: Optional[str] = dataclasses.field(init=False, default=None, repr=False, compare=False)
113
+ _sdk: Optional["WorkspaceClient"] = dataclasses.field(default=None, repr=False, compare=False, hash=False)
114
+ _was_connected: bool = dataclasses.field(default=None, repr=False, compare=False, hash=False)
115
+ _cached_token: Optional[str] = dataclasses.field(default=None, repr=False, compare=False, hash=False)
114
116
 
115
117
  # -------------------------
116
118
  # Pickle support
@@ -175,19 +177,43 @@ class Workspace:
175
177
  # -------------------------
176
178
  def clone_instance(
177
179
  self,
178
- **kwargs
179
180
  ) -> "Workspace":
180
181
  """Clone the workspace config with overrides.
181
182
 
182
- Args:
183
- **kwargs: Field overrides for the clone.
184
-
185
183
  Returns:
186
184
  A new Workspace instance with updated fields.
187
185
  """
188
- state = self.__getstate__()
189
- state.update(kwargs)
190
- return Workspace().__setstate__(state)
186
+ return Workspace(
187
+ host = self.host,
188
+ account_id = self.account_id,
189
+ token = self.token,
190
+ client_id = self.client_id,
191
+ client_secret = self.client_secret,
192
+ token_audience = self.token_audience,
193
+ azure_workspace_resource_id = self.azure_workspace_resource_id,
194
+ azure_use_msi = self.azure_use_msi,
195
+ azure_client_secret = self.azure_client_secret,
196
+ azure_client_id = self.azure_client_id,
197
+ azure_tenant_id = self.azure_tenant_id,
198
+ azure_environment = self.azure_environment,
199
+ google_credentials = self.google_credentials,
200
+ google_service_account = self.google_service_account,
201
+ profile = self.profile,
202
+ config_file = self.config_file,
203
+ auth_type = self.auth_type,
204
+ http_timeout_seconds = self.http_timeout_seconds,
205
+ retry_timeout_seconds = self.retry_timeout_seconds,
206
+ debug_truncate_bytes = self.debug_truncate_bytes,
207
+ debug_headers = self.debug_headers,
208
+ rate_limit = self.rate_limit,
209
+ product = self.product,
210
+ product_version = self.product_version,
211
+ product_tag = self.product_tag,
212
+ custom_tags = self.custom_tags,
213
+ _sdk = self._sdk,
214
+ _was_connected = self._was_connected,
215
+ _cached_token = self._cached_token,
216
+ )
191
217
 
192
218
  # -------------------------
193
219
  # SDK connection
@@ -300,8 +326,9 @@ class Workspace:
300
326
  Drop the cached WorkspaceClient (no actual close needed, but this
301
327
  avoids reusing stale config).
302
328
  """
303
- self._sdk = None
304
- self._was_connected = False
329
+ if self._sdk is not None:
330
+ self._sdk = None
331
+ self._was_connected = False
305
332
 
306
333
  # ------------------------------------------------------------------ #
307
334
  # Properties
@@ -561,28 +588,19 @@ class Workspace:
561
588
  Returns:
562
589
  A dict of default tags.
563
590
  """
564
- return {
591
+ base = {
565
592
  k: v
566
593
  for k, v in (
567
594
  ("Product", self.product),
568
- ("ProductVersion", self.product_version),
569
595
  ("ProductTag", self.product_tag),
570
- ("ProductUser", self.current_user.user_name)
571
596
  )
572
597
  if v
573
598
  }
574
599
 
575
- def merge_tags(self, existing: dict | None = None):
576
- """Merge default tags with an existing set.
577
-
578
- Args:
579
- existing: Optional existing tags.
600
+ if self.custom_tags:
601
+ base.update(self.custom_tags)
580
602
 
581
- Returns:
582
- A dict of merged tags.
583
- """
584
- if existing:
585
- return self.default_tags()
603
+ return base
586
604
 
587
605
  def sql(
588
606
  self,
@@ -16,7 +16,7 @@ import sys
16
16
  import tempfile
17
17
  import threading
18
18
  from contextlib import contextmanager
19
- from dataclasses import dataclass
19
+ from dataclasses import dataclass, field
20
20
  from pathlib import Path
21
21
  from typing import Any, Iterable, Iterator, Mapping, MutableMapping, Optional, Union, List, Tuple
22
22
 
@@ -415,11 +415,13 @@ def _locked_env(root: Path):
415
415
  # PythonEnv
416
416
  # -----------------------
417
417
 
418
- @dataclass(frozen=True)
418
+ @dataclass
419
419
  class PythonEnv:
420
420
  """Represent a managed Python environment rooted at a filesystem path."""
421
421
  root: Path
422
422
 
423
+ _version: Optional[str] = field(default=None, repr=False)
424
+
423
425
  def __post_init__(self) -> None:
424
426
  """Normalize the root path after dataclass initialization.
425
427
 
@@ -862,8 +864,9 @@ class PythonEnv:
862
864
  Returns:
863
865
  Version string.
864
866
  """
865
- out = self.exec_code("import sys; print(sys.version.split()[0])", check=True)
866
- return out.strip()
867
+ if self._version is None:
868
+ self._version = self.exec_code("import sys; print(sys.version.split()[0])", check=True).strip()
869
+ return self._version
867
870
 
868
871
  @property
869
872
  def version_info(self) -> tuple[int, int, int]:
yggdrasil/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.1.48"
1
+ __version__ = "0.1.49"
File without changes