ygg 0.1.43__py3-none-any.whl → 0.1.45__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ygg
3
- Version: 0.1.43
3
+ Version: 0.1.45
4
4
  Summary: Type-friendly utilities for moving data between Python objects, Arrow, Polars, Pandas, Spark, and Databricks
5
5
  Author: Yggdrasil contributors
6
6
  License: Apache License
@@ -1,24 +1,25 @@
1
- ygg-0.1.43.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
1
+ ygg-0.1.45.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
2
2
  yggdrasil/__init__.py,sha256=PfH7Xwt6uue6oqe6S5V8NhDJcVQClkKrBE1KXhdelZc,117
3
- yggdrasil/version.py,sha256=UoVHMD6MTDcIuCiAGXBFICsOFwtnni_KEOLxLK-bMlk,22
3
+ yggdrasil/version.py,sha256=zC5ozeVSiy36qEh2Scy5PuAbmhSHW3T5nZLXsLr6SNI,22
4
4
  yggdrasil/databricks/__init__.py,sha256=skctY2c8W-hI81upx9F_PWRe5ishL3hrdiTuizgDjdw,152
5
5
  yggdrasil/databricks/compute/__init__.py,sha256=NvdzmaJSNYY1uJthv1hHdBuNu3bD_-Z65DWnaJt9yXg,289
6
- yggdrasil/databricks/compute/cluster.py,sha256=RLXEKgGAeIOPH6lzX3J8brBoqDu_RsjiZam-yymiabM,40933
7
- yggdrasil/databricks/compute/execution_context.py,sha256=E86b-VF9hZ4WbqHt43ATR-YYvgPZ91_17DQ5oaQFNgg,22688
6
+ yggdrasil/databricks/compute/cluster.py,sha256=HI9811oBCpWeo4V921FVAlRUXKXM4XO7HS9DQVOuzpM,41340
7
+ yggdrasil/databricks/compute/execution_context.py,sha256=_2xSEskWR0ODD0bxCxcuM44mS1sbutUh0DF7-PUYNsw,22158
8
8
  yggdrasil/databricks/compute/remote.py,sha256=nEN_Fr1Ouul_iKOf4B5QjEGscYAcl7nHjGsl2toRzrU,2874
9
9
  yggdrasil/databricks/jobs/__init__.py,sha256=snxGSJb0M5I39v0y3IR-uEeSlZR248cQ_4DJ1sYs-h8,154
10
10
  yggdrasil/databricks/jobs/config.py,sha256=9LGeHD04hbfy0xt8_6oobC4moKJh4_DTjZiK4Q2Tqjk,11557
11
11
  yggdrasil/databricks/sql/__init__.py,sha256=y1n5yg-drZ8QVZbEgznsRG24kdJSnFis9l2YfYCsaCM,234
12
- yggdrasil/databricks/sql/engine.py,sha256=Azx3gKtWOMy3D9I2FhkLmpthZPWAJZ9iZkaDivmt_0s,41002
12
+ yggdrasil/databricks/sql/engine.py,sha256=K5WmGKpXU78JA3UdK8dLxBD_GXKidZJFe7hytuC5UHg,41029
13
13
  yggdrasil/databricks/sql/exceptions.py,sha256=Jqd_gT_VyPL8klJEHYEzpv5eHtmdY43WiQ7HZBaEqSk,53
14
14
  yggdrasil/databricks/sql/statement_result.py,sha256=_mBolHae0AASfe1Tlld1KTXs-K4-oy9dniHDyR2ILYc,16736
15
15
  yggdrasil/databricks/sql/types.py,sha256=5G-BM9_eOsRKEMzeDTWUsWW5g4Idvs-czVCpOCrMhdA,6412
16
+ yggdrasil/databricks/sql/warehouse.py,sha256=1J0dyQLJb-OS1_1xU1eAVZ4CoL2-FhFeowKSvU3RzFc,9773
16
17
  yggdrasil/databricks/workspaces/__init__.py,sha256=Ti1I99JTC3koYJaCy8WYvkAox4KdcuMRk8b2rHroWCY,133
17
18
  yggdrasil/databricks/workspaces/filesytem.py,sha256=Z8JXU7_XUEbw9fpTQT1avRQKi-IAP2KemXBMPkUoY4w,9805
18
19
  yggdrasil/databricks/workspaces/io.py,sha256=Tdde4LaGNJNT50R11OkEYZyNacyIW9QrOXMAicAlIr4,32208
19
20
  yggdrasil/databricks/workspaces/path.py,sha256=-XnCD9p42who3DAwnITVE1KyrZUSoXDKHA8iZi-7wk4,47743
20
21
  yggdrasil/databricks/workspaces/path_kind.py,sha256=Xc319NysH8_6E9C0Q8nCxDHYG07_SnzyUVKHe0dNdDQ,305
21
- yggdrasil/databricks/workspaces/workspace.py,sha256=MW-BEyldROqbX9SBbDspvlys_zehJjK5YgM3sGLfW-g,23382
22
+ yggdrasil/databricks/workspaces/workspace.py,sha256=xxHAmFvZQ0FDWPe8pUGW-oVX9MfYmkKyqt0VQ312vbk,23915
22
23
  yggdrasil/dataclasses/__init__.py,sha256=6SdfIyTsoM4AuVw5TW4Q-UWXz41EyfsMcpD30cmjbSM,125
23
24
  yggdrasil/dataclasses/dataclass.py,sha256=fKokFUnqe4CmXXGMTdF4XDWbCUl_c_-se-UD48L5s1E,6594
24
25
  yggdrasil/libs/__init__.py,sha256=ulzk-ZkFUI2Pfo93YKtO8MBsEWtRZzLos7HAxN74R0w,168
@@ -30,10 +31,10 @@ yggdrasil/libs/extensions/__init__.py,sha256=mcXW5Li3Cbprbs4Ci-b5A0Ju0wmLcfvEiFu
30
31
  yggdrasil/libs/extensions/polars_extensions.py,sha256=RTkGi8llhPJjX7x9egix7-yXWo2X24zIAPSKXV37SSA,12397
31
32
  yggdrasil/libs/extensions/spark_extensions.py,sha256=E64n-3SFTDgMuXwWitX6vOYP9ln2lpGKb0htoBLEZgc,16745
32
33
  yggdrasil/pyutils/__init__.py,sha256=tl-LapAc71TV7RMgf2ftKwrzr8iiLOGHeJgA3RvO93w,293
33
- yggdrasil/pyutils/callable_serde.py,sha256=FtbY2PUBHwDe2IyX28gLJwQYjtrvhvqtTSIIBc5x-dk,23538
34
+ yggdrasil/pyutils/callable_serde.py,sha256=_mXfVFVG8zxTM7ha3DkfOCJLEJFXuu7CrMlki97oYLc,20657
34
35
  yggdrasil/pyutils/equality.py,sha256=Xyf8D1dLUCm3spDEir8Zyj7O4US_fBJwEylJCfJ9slI,3080
35
36
  yggdrasil/pyutils/exceptions.py,sha256=ssKNm-rjhavHUOZmGA7_1Gq9tSHDrb2EFI-cnBuWgng,3388
36
- yggdrasil/pyutils/expiring_dict.py,sha256=q9gb09-2EUN-jQZumUw5BXOQGYcj1wb85qKtQlciSxg,5825
37
+ yggdrasil/pyutils/expiring_dict.py,sha256=pr2u25LGwPVbLfsLptiHGovUtYRRo0AMjaJtCtJl7nQ,8477
37
38
  yggdrasil/pyutils/modules.py,sha256=B7IP99YqUMW6-DIESFzBx8-09V1d0a8qrIJUDFhhL2g,11424
38
39
  yggdrasil/pyutils/parallel.py,sha256=ubuq2m9dJzWYUyKCga4Y_9bpaeMYUrleYxdp49CHr44,6781
39
40
  yggdrasil/pyutils/python_env.py,sha256=tuglnjdqHQjNh18qDladVoSEOjCD0RcnMEPYJ0tArOs,50985
@@ -55,8 +56,8 @@ yggdrasil/types/cast/registry.py,sha256=_zdFGmUBB7P-e_LIcJlOxMcxAkXoA-UXB6HqLMgT
55
56
  yggdrasil/types/cast/spark_cast.py,sha256=_KAsl1DqmKMSfWxqhVE7gosjYdgiL1C5bDQv6eP3HtA,24926
56
57
  yggdrasil/types/cast/spark_pandas_cast.py,sha256=BuTiWrdCANZCdD_p2MAytqm74eq-rdRXd-LGojBRrfU,5023
57
58
  yggdrasil/types/cast/spark_polars_cast.py,sha256=btmZNHXn2NSt3fUuB4xg7coaE0RezIBdZD92H8NK0Jw,9073
58
- ygg-0.1.43.dist-info/METADATA,sha256=UWEaa2zwto1jGhoHMzROMgAsrXlPddS5qQkBHCAZjoI,19204
59
- ygg-0.1.43.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
60
- ygg-0.1.43.dist-info/entry_points.txt,sha256=6q-vpWG3kvw2dhctQ0LALdatoeefkN855Ev02I1dKGY,70
61
- ygg-0.1.43.dist-info/top_level.txt,sha256=iBe9Kk4VIVbLpgv_p8OZUIfxgj4dgJ5wBg6vO3rigso,10
62
- ygg-0.1.43.dist-info/RECORD,,
59
+ ygg-0.1.45.dist-info/METADATA,sha256=7rdXzbU9WAoLlFGZvlr9gNMMUcvOhzpGMeSRDHqmRlE,19204
60
+ ygg-0.1.45.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
61
+ ygg-0.1.45.dist-info/entry_points.txt,sha256=6q-vpWG3kvw2dhctQ0LALdatoeefkN855Ev02I1dKGY,70
62
+ ygg-0.1.45.dist-info/top_level.txt,sha256=iBe9Kk4VIVbLpgv_p8OZUIfxgj4dgJ5wBg6vO3rigso,10
63
+ ygg-0.1.45.dist-info/RECORD,,
@@ -22,7 +22,7 @@ from typing import Any, Iterator, Optional, Union, List, Callable, Dict, ClassVa
22
22
 
23
23
  from .execution_context import ExecutionContext
24
24
  from ..workspaces.workspace import WorkspaceService, Workspace
25
- from ... import retry, CallableSerde
25
+ from ... import CallableSerde
26
26
  from ...libs.databrickslib import databricks_sdk
27
27
  from ...pyutils.equality import dicts_equal, dict_diff
28
28
  from ...pyutils.expiring_dict import ExpiringDict
@@ -47,6 +47,7 @@ else: # pragma: no cover - runtime fallback when SDK is missing
47
47
  __all__ = ["Cluster"]
48
48
 
49
49
 
50
+ LOGGER = logging.getLogger(__name__)
50
51
  NAME_ID_CACHE: dict[str, ExpiringDict] = {}
51
52
 
52
53
 
@@ -72,9 +73,6 @@ def get_cached_cluster_id(
72
73
  return existing.get(cluster_name) if existing else None
73
74
 
74
75
 
75
- logger = logging.getLogger(__name__)
76
-
77
-
78
76
  # module-level mapping Databricks Runtime -> (major, minor) Python version
79
77
  _PYTHON_BY_DBR: dict[str, tuple[int, int]] = {
80
78
  "10.4": (3, 8),
@@ -363,7 +361,8 @@ class Cluster(WorkspaceService):
363
361
  tick: float = 0.5,
364
362
  timeout: Union[float, dt.timedelta] = 600,
365
363
  backoff: int = 2,
366
- max_sleep_time: float = 15
364
+ max_sleep_time: float = 15,
365
+ wait_libraries: bool = True
367
366
  ):
368
367
  """Wait for the cluster to exit pending states.
369
368
 
@@ -372,6 +371,7 @@ class Cluster(WorkspaceService):
372
371
  timeout: Max seconds to wait before timing out.
373
372
  backoff: Backoff multiplier for the sleep interval.
374
373
  max_sleep_time: Maximum sleep interval in seconds.
374
+ wait_libraries: Wait libraries to install fully
375
375
 
376
376
  Returns:
377
377
  The current Cluster instance.
@@ -390,7 +390,8 @@ class Cluster(WorkspaceService):
390
390
 
391
391
  sleep_time = min(max_sleep_time, sleep_time * backoff)
392
392
 
393
- self.wait_installed_libraries()
393
+ if wait_libraries:
394
+ self.wait_installed_libraries()
394
395
 
395
396
  self.raise_for_status()
396
397
 
@@ -638,7 +639,7 @@ class Cluster(WorkspaceService):
638
639
  if k in _CREATE_ARG_NAMES
639
640
  }
640
641
 
641
- logger.debug(
642
+ LOGGER.debug(
642
643
  "Creating Databricks cluster %s with %s",
643
644
  update_details["cluster_name"],
644
645
  update_details,
@@ -646,7 +647,7 @@ class Cluster(WorkspaceService):
646
647
 
647
648
  self.details = self.clusters_client().create_and_wait(**update_details)
648
649
 
649
- logger.info(
650
+ LOGGER.info(
650
651
  "Created %s",
651
652
  self
652
653
  )
@@ -658,12 +659,14 @@ class Cluster(WorkspaceService):
658
659
  def update(
659
660
  self,
660
661
  libraries: Optional[List[Union[str, "Library"]]] = None,
662
+ wait_timeout: Union[float, dt.timedelta] = dt.timedelta(minutes=20),
661
663
  **cluster_spec: Any
662
664
  ) -> "Cluster":
663
665
  """Update cluster configuration and optionally install libraries.
664
666
 
665
667
  Args:
666
668
  libraries: Optional libraries to install.
669
+ wait_timeout: waiting timeout until done, if None it does not wait
667
670
  **cluster_spec: Cluster specification overrides.
668
671
 
669
672
  Returns:
@@ -697,26 +700,22 @@ class Cluster(WorkspaceService):
697
700
  for k, v in dict_diff(existing_details, update_details, keys=_EDIT_ARG_NAMES).items()
698
701
  }
699
702
 
700
- logger.debug(
703
+ LOGGER.debug(
701
704
  "Updating %s with %s",
702
705
  self, diff
703
706
  )
704
707
 
705
708
  self.wait_for_status()
706
- try:
707
- self.details = self.clusters_client().edit_and_wait(**update_details)
708
- except Exception as e:
709
- if self.state == State.TERMINATED:
710
- self.start()
711
- self.details = self.clusters_client().edit_and_wait(**update_details)
712
- else:
713
- raise e
714
-
715
- logger.info(
709
+ self.clusters_client().edit(**update_details)
710
+
711
+ LOGGER.info(
716
712
  "Updated %s",
717
713
  self
718
714
  )
719
715
 
716
+ if wait_timeout:
717
+ self.wait_for_status(timeout=wait_timeout)
718
+
720
719
  return self
721
720
 
722
721
  def list_clusters(self) -> Iterator["Cluster"]:
@@ -813,7 +812,7 @@ class Cluster(WorkspaceService):
813
812
  self.wait_for_status()
814
813
 
815
814
  if not self.is_running:
816
- logger.info("Starting %s", self)
815
+ LOGGER.debug("Starting %s", self)
817
816
 
818
817
  if wait_timeout:
819
818
  self.clusters_client().start(cluster_id=self.cluster_id)
@@ -821,6 +820,8 @@ class Cluster(WorkspaceService):
821
820
  else:
822
821
  self.clusters_client().start(cluster_id=self.cluster_id)
823
822
 
823
+ LOGGER.info("Started %s", self)
824
+
824
825
  return self
825
826
 
826
827
  def restart(
@@ -834,7 +835,6 @@ class Cluster(WorkspaceService):
834
835
  self.wait_for_status()
835
836
 
836
837
  if self.is_running:
837
- logger.info("Restarting %s", self)
838
838
  self.details = self.clusters_client().restart_and_wait(cluster_id=self.cluster_id)
839
839
  return self.wait_for_status()
840
840
 
@@ -848,8 +848,10 @@ class Cluster(WorkspaceService):
848
848
  Returns:
849
849
  The SDK delete response.
850
850
  """
851
- logger.info("Deleting %s", self)
852
- return self.clusters_client().delete(cluster_id=self.cluster_id)
851
+ if self.cluster_id:
852
+ LOGGER.debug("Deleting %s", self)
853
+ self.clusters_client().delete(cluster_id=self.cluster_id)
854
+ LOGGER.info("Deleted %s", self)
853
855
 
854
856
  def context(
855
857
  self,
@@ -1027,6 +1029,17 @@ class Cluster(WorkspaceService):
1027
1029
  for _ in libraries if _
1028
1030
  ]
1029
1031
 
1032
+ if libraries:
1033
+ existing = [
1034
+ _.library for _ in self.installed_library_statuses()
1035
+ ]
1036
+
1037
+ libraries = [
1038
+ _
1039
+ for _ in libraries
1040
+ if _ not in existing
1041
+ ]
1042
+
1030
1043
  if libraries:
1031
1044
  wsdk.libraries.install(
1032
1045
  cluster_id=self.cluster_id,
@@ -1141,7 +1154,7 @@ class Cluster(WorkspaceService):
1141
1154
  if raise_error:
1142
1155
  raise DatabricksError("Libraries %s in %s failed to install" % (failed, self))
1143
1156
 
1144
- logger.warning(
1157
+ LOGGER.exception(
1145
1158
  "Libraries %s in %s failed to install",
1146
1159
  failed, self
1147
1160
  )
@@ -1178,7 +1191,7 @@ class Cluster(WorkspaceService):
1178
1191
  Returns:
1179
1192
  The uploaded library argument(s).
1180
1193
  """
1181
- return self.context().install_temporary_libraries(libraries=libraries)
1194
+ return self.system_context.install_temporary_libraries(libraries=libraries)
1182
1195
 
1183
1196
  def _check_library(
1184
1197
  self,
@@ -17,6 +17,7 @@ from typing import TYPE_CHECKING, Optional, Any, Callable, List, Dict, Union, It
17
17
 
18
18
  from ...libs.databrickslib import databricks_sdk
19
19
  from ...pyutils.exceptions import raise_parsed_traceback
20
+ from ...pyutils.expiring_dict import ExpiringDict
20
21
  from ...pyutils.modules import resolve_local_lib_path
21
22
  from ...pyutils.callable_serde import CallableSerde
22
23
 
@@ -30,7 +31,7 @@ __all__ = [
30
31
  "ExecutionContext"
31
32
  ]
32
33
 
33
- logger = logging.getLogger(__name__)
34
+ LOGGER = logging.getLogger(__name__)
34
35
 
35
36
 
36
37
  @dc.dataclass
@@ -38,7 +39,6 @@ class RemoteMetadata:
38
39
  """Metadata describing the remote cluster execution environment."""
39
40
  site_packages_path: Optional[str] = dc.field(default=None)
40
41
  os_env: Dict[str, str] = dc.field(default_factory=dict)
41
- requirements: Optional[str] = dc.field(default=None)
42
42
  version_info: Tuple[int, int, int] = dc.field(default=(0, 0, 0))
43
43
 
44
44
  def os_env_diff(
@@ -80,6 +80,7 @@ class ExecutionContext:
80
80
 
81
81
  _was_connected: Optional[bool] = dc.field(default=None, repr=False)
82
82
  _remote_metadata: Optional[RemoteMetadata] = dc.field(default=None, repr=False)
83
+ _uploaded_package_roots: Optional[ExpiringDict] = dc.field(default_factory=ExpiringDict, repr=False)
83
84
 
84
85
  _lock: threading.RLock = dc.field(default_factory=threading.RLock, init=False, repr=False)
85
86
 
@@ -127,9 +128,7 @@ class ExecutionContext:
127
128
  with self._lock:
128
129
  # double-check after acquiring lock
129
130
  if self._remote_metadata is None:
130
- cmd = r"""import glob
131
- import json
132
- import os
131
+ cmd = r"""import glob, json, os
133
132
  from yggdrasil.pyutils.python_env import PythonEnv
134
133
 
135
134
  current_env = PythonEnv.get_current()
@@ -144,7 +143,6 @@ os_env = meta["os_env"] = {}
144
143
  for k, v in os.environ.items():
145
144
  os_env[k] = v
146
145
 
147
- meta["requirements"] = current_env.requirements()
148
146
  meta["version_info"] = current_env.version_info
149
147
 
150
148
  print(json.dumps(meta))"""
@@ -191,7 +189,7 @@ print(json.dumps(meta))"""
191
189
  """
192
190
  self.cluster.ensure_running()
193
191
 
194
- logger.debug(
192
+ LOGGER.debug(
195
193
  "Creating Databricks command execution context for %s",
196
194
  self.cluster
197
195
  )
@@ -217,7 +215,7 @@ print(json.dumps(meta))"""
217
215
  The connected ExecutionContext instance.
218
216
  """
219
217
  if self.context_id is not None:
220
- logger.debug(
218
+ LOGGER.debug(
221
219
  "Execution context already open for %s",
222
220
  self
223
221
  )
@@ -235,7 +233,7 @@ print(json.dumps(meta))"""
235
233
  raise RuntimeError("Failed to create command execution context")
236
234
 
237
235
  self.context_id = context_id
238
- logger.info(
236
+ LOGGER.info(
239
237
  "Opened execution context for %s",
240
238
  self
241
239
  )
@@ -247,13 +245,9 @@ print(json.dumps(meta))"""
247
245
  Returns:
248
246
  None.
249
247
  """
250
- if self.context_id is None:
248
+ if not self.context_id:
251
249
  return
252
250
 
253
- logger.debug(
254
- "Closing execution context for %s",
255
- self
256
- )
257
251
  try:
258
252
  self._workspace_client().command_execution.destroy(
259
253
  cluster_id=self.cluster.cluster_id,
@@ -349,7 +343,7 @@ print(json.dumps(meta))"""
349
343
 
350
344
  self.connect(language=Language.PYTHON)
351
345
 
352
- logger.debug(
346
+ LOGGER.debug(
353
347
  "Executing callable %s with %s",
354
348
  getattr(func, "__name__", type(func)),
355
349
  self,
@@ -394,12 +388,18 @@ print(json.dumps(meta))"""
394
388
  module_name = module_name.split(".")[0]
395
389
 
396
390
  if module_name and "yggdrasil" not in module_name:
397
- self.close()
391
+ LOGGER.debug(
392
+ "Installing missing module %s from local environment",
393
+ module_name,
394
+ )
398
395
 
399
- self.cluster.install_libraries(
396
+ self.install_temporary_libraries(
400
397
  libraries=[module_name],
401
- raise_error=True,
402
- restart=True
398
+ )
399
+
400
+ LOGGER.warning(
401
+ "Installed missing module %s from local environment",
402
+ module_name,
403
403
  )
404
404
 
405
405
  return self.execute_callable(
@@ -412,6 +412,7 @@ print(json.dumps(meta))"""
412
412
  timeout=timeout,
413
413
  command=command,
414
414
  )
415
+
415
416
  raise remote_module_error
416
417
 
417
418
  return result
@@ -446,29 +447,7 @@ print(json.dumps(meta))"""
446
447
  timeout=timeout or dt.timedelta(minutes=20)
447
448
  )
448
449
 
449
- try:
450
- return self._decode_result(result, result_tag=result_tag, print_stdout=print_stdout)
451
- except ModuleNotFoundError as remote_module_error:
452
- _MOD_NOT_FOUND_RE = re.compile(r"No module named ['\"]([^'\"]+)['\"]")
453
- module_name = _MOD_NOT_FOUND_RE.search(str(remote_module_error))
454
- module_name = module_name.group(1) if module_name else None
455
- module_name = module_name.split(".")[0]
456
-
457
- if module_name and "yggdrasil" not in module_name:
458
- self.close()
459
- self.cluster.install_libraries(
460
- libraries=[module_name],
461
- raise_error=True,
462
- restart=True
463
- )
464
-
465
- return self.execute_command(
466
- command=command,
467
- timeout=timeout,
468
- result_tag=result_tag,
469
- print_stdout=print_stdout
470
- )
471
- raise remote_module_error
450
+ return self._decode_result(result, result_tag=result_tag, print_stdout=print_stdout)
472
451
 
473
452
  # ------------------------------------------------------------------
474
453
  # generic local → remote uploader, via remote python
@@ -589,16 +568,22 @@ with zipfile.ZipFile(buf, "r") as zf:
589
568
  ]
590
569
 
591
570
  resolved = resolve_local_lib_path(libraries)
571
+ str_resolved = str(resolved)
572
+ existing = self._uploaded_package_roots.get(str_resolved)
592
573
 
593
- remote_site_packages_path = self.remote_metadata.site_packages_path
594
- if resolved.is_dir():
595
- # site-packages/<package_name>/
596
- remote_target = posixpath.join(remote_site_packages_path, resolved.name)
597
- else:
598
- # site-packages/<module_file>
599
- remote_target = posixpath.join(remote_site_packages_path, resolved.name)
574
+ if not existing:
575
+ remote_site_packages_path = self.remote_metadata.site_packages_path
576
+
577
+ if resolved.is_dir():
578
+ # site-packages/<package_name>/
579
+ remote_target = posixpath.join(remote_site_packages_path, resolved.name)
580
+ else:
581
+ # site-packages/<module_file>
582
+ remote_target = posixpath.join(remote_site_packages_path, resolved.name)
583
+
584
+ self.upload_local_path(resolved, remote_target)
600
585
 
601
- self.upload_local_path(resolved, remote_target)
586
+ self._uploaded_package_roots[str_resolved] = remote_target
602
587
 
603
588
  return libraries
604
589
 
@@ -130,7 +130,10 @@ class SQLEngine(WorkspaceService):
130
130
  schema_name = schema_name or self.schema_name
131
131
  return catalog_name, schema_name, table_name
132
132
 
133
- def _default_warehouse(self, cluster_size: str = "Small"):
133
+ def _default_warehouse(
134
+ self,
135
+ cluster_size: str = "Small"
136
+ ):
134
137
  """Pick a default SQL warehouse (best-effort) matching the desired size.
135
138
 
136
139
  Args:
@@ -951,7 +954,7 @@ FROM parquet.`{temp_volume_path}`"""
951
954
  """Convert an Arrow Field to a Databricks SQL column DDL fragment."""
952
955
  name = field.name
953
956
  nullable_str = " NOT NULL" if put_not_null and not field.nullable else ""
954
- name_str = f"{name} " if put_name else ""
957
+ name_str = f"`{name}` " if put_name else ""
955
958
 
956
959
  comment_str = ""
957
960
  if put_comment and field.metadata and b"comment" in field.metadata: