ygg 0.1.33__tar.gz → 0.1.34__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. {ygg-0.1.33 → ygg-0.1.34}/PKG-INFO +1 -1
  2. {ygg-0.1.33 → ygg-0.1.34}/pyproject.toml +1 -1
  3. {ygg-0.1.33 → ygg-0.1.34}/src/ygg.egg-info/PKG-INFO +1 -1
  4. {ygg-0.1.33 → ygg-0.1.34}/src/ygg.egg-info/SOURCES.txt +1 -0
  5. {ygg-0.1.33 → ygg-0.1.34}/src/yggdrasil/databricks/compute/cluster.py +106 -57
  6. {ygg-0.1.33 → ygg-0.1.34}/src/yggdrasil/databricks/compute/execution_context.py +5 -2
  7. {ygg-0.1.33 → ygg-0.1.34}/src/yggdrasil/databricks/compute/remote.py +6 -5
  8. {ygg-0.1.33 → ygg-0.1.34}/src/yggdrasil/databricks/sql/engine.py +295 -321
  9. {ygg-0.1.33 → ygg-0.1.34}/src/yggdrasil/databricks/workspaces/workspace.py +12 -1
  10. {ygg-0.1.33 → ygg-0.1.34}/src/yggdrasil/pyutils/callable_serde.py +27 -2
  11. ygg-0.1.34/src/yggdrasil/pyutils/expiring_dict.py +176 -0
  12. ygg-0.1.34/src/yggdrasil/version.py +1 -0
  13. ygg-0.1.33/src/yggdrasil/version.py +0 -1
  14. {ygg-0.1.33 → ygg-0.1.34}/LICENSE +0 -0
  15. {ygg-0.1.33 → ygg-0.1.34}/README.md +0 -0
  16. {ygg-0.1.33 → ygg-0.1.34}/setup.cfg +0 -0
  17. {ygg-0.1.33 → ygg-0.1.34}/src/ygg.egg-info/dependency_links.txt +0 -0
  18. {ygg-0.1.33 → ygg-0.1.34}/src/ygg.egg-info/entry_points.txt +0 -0
  19. {ygg-0.1.33 → ygg-0.1.34}/src/ygg.egg-info/requires.txt +0 -0
  20. {ygg-0.1.33 → ygg-0.1.34}/src/ygg.egg-info/top_level.txt +0 -0
  21. {ygg-0.1.33 → ygg-0.1.34}/src/yggdrasil/__init__.py +0 -0
  22. {ygg-0.1.33 → ygg-0.1.34}/src/yggdrasil/databricks/__init__.py +0 -0
  23. {ygg-0.1.33 → ygg-0.1.34}/src/yggdrasil/databricks/compute/__init__.py +0 -0
  24. {ygg-0.1.33 → ygg-0.1.34}/src/yggdrasil/databricks/jobs/__init__.py +0 -0
  25. {ygg-0.1.33 → ygg-0.1.34}/src/yggdrasil/databricks/jobs/config.py +0 -0
  26. {ygg-0.1.33 → ygg-0.1.34}/src/yggdrasil/databricks/sql/__init__.py +0 -0
  27. {ygg-0.1.33 → ygg-0.1.34}/src/yggdrasil/databricks/sql/exceptions.py +0 -0
  28. {ygg-0.1.33 → ygg-0.1.34}/src/yggdrasil/databricks/sql/statement_result.py +0 -0
  29. {ygg-0.1.33 → ygg-0.1.34}/src/yggdrasil/databricks/sql/types.py +0 -0
  30. {ygg-0.1.33 → ygg-0.1.34}/src/yggdrasil/databricks/workspaces/__init__.py +0 -0
  31. {ygg-0.1.33 → ygg-0.1.34}/src/yggdrasil/databricks/workspaces/filesytem.py +0 -0
  32. {ygg-0.1.33 → ygg-0.1.34}/src/yggdrasil/databricks/workspaces/io.py +0 -0
  33. {ygg-0.1.33 → ygg-0.1.34}/src/yggdrasil/databricks/workspaces/path.py +0 -0
  34. {ygg-0.1.33 → ygg-0.1.34}/src/yggdrasil/databricks/workspaces/path_kind.py +0 -0
  35. {ygg-0.1.33 → ygg-0.1.34}/src/yggdrasil/dataclasses/__init__.py +0 -0
  36. {ygg-0.1.33 → ygg-0.1.34}/src/yggdrasil/dataclasses/dataclass.py +0 -0
  37. {ygg-0.1.33 → ygg-0.1.34}/src/yggdrasil/libs/__init__.py +0 -0
  38. {ygg-0.1.33 → ygg-0.1.34}/src/yggdrasil/libs/databrickslib.py +0 -0
  39. {ygg-0.1.33 → ygg-0.1.34}/src/yggdrasil/libs/extensions/__init__.py +0 -0
  40. {ygg-0.1.33 → ygg-0.1.34}/src/yggdrasil/libs/extensions/polars_extensions.py +0 -0
  41. {ygg-0.1.33 → ygg-0.1.34}/src/yggdrasil/libs/extensions/spark_extensions.py +0 -0
  42. {ygg-0.1.33 → ygg-0.1.34}/src/yggdrasil/libs/pandaslib.py +0 -0
  43. {ygg-0.1.33 → ygg-0.1.34}/src/yggdrasil/libs/polarslib.py +0 -0
  44. {ygg-0.1.33 → ygg-0.1.34}/src/yggdrasil/libs/sparklib.py +0 -0
  45. {ygg-0.1.33 → ygg-0.1.34}/src/yggdrasil/pyutils/__init__.py +0 -0
  46. {ygg-0.1.33 → ygg-0.1.34}/src/yggdrasil/pyutils/exceptions.py +0 -0
  47. {ygg-0.1.33 → ygg-0.1.34}/src/yggdrasil/pyutils/modules.py +0 -0
  48. {ygg-0.1.33 → ygg-0.1.34}/src/yggdrasil/pyutils/parallel.py +0 -0
  49. {ygg-0.1.33 → ygg-0.1.34}/src/yggdrasil/pyutils/python_env.py +0 -0
  50. {ygg-0.1.33 → ygg-0.1.34}/src/yggdrasil/pyutils/retry.py +0 -0
  51. {ygg-0.1.33 → ygg-0.1.34}/src/yggdrasil/requests/__init__.py +0 -0
  52. {ygg-0.1.33 → ygg-0.1.34}/src/yggdrasil/requests/msal.py +0 -0
  53. {ygg-0.1.33 → ygg-0.1.34}/src/yggdrasil/requests/session.py +0 -0
  54. {ygg-0.1.33 → ygg-0.1.34}/src/yggdrasil/types/__init__.py +0 -0
  55. {ygg-0.1.33 → ygg-0.1.34}/src/yggdrasil/types/cast/__init__.py +0 -0
  56. {ygg-0.1.33 → ygg-0.1.34}/src/yggdrasil/types/cast/arrow_cast.py +0 -0
  57. {ygg-0.1.33 → ygg-0.1.34}/src/yggdrasil/types/cast/cast_options.py +0 -0
  58. {ygg-0.1.33 → ygg-0.1.34}/src/yggdrasil/types/cast/pandas_cast.py +0 -0
  59. {ygg-0.1.33 → ygg-0.1.34}/src/yggdrasil/types/cast/polars_cast.py +0 -0
  60. {ygg-0.1.33 → ygg-0.1.34}/src/yggdrasil/types/cast/polars_pandas_cast.py +0 -0
  61. {ygg-0.1.33 → ygg-0.1.34}/src/yggdrasil/types/cast/registry.py +0 -0
  62. {ygg-0.1.33 → ygg-0.1.34}/src/yggdrasil/types/cast/spark_cast.py +0 -0
  63. {ygg-0.1.33 → ygg-0.1.34}/src/yggdrasil/types/cast/spark_pandas_cast.py +0 -0
  64. {ygg-0.1.33 → ygg-0.1.34}/src/yggdrasil/types/cast/spark_polars_cast.py +0 -0
  65. {ygg-0.1.33 → ygg-0.1.34}/src/yggdrasil/types/libs.py +0 -0
  66. {ygg-0.1.33 → ygg-0.1.34}/src/yggdrasil/types/python_arrow.py +0 -0
  67. {ygg-0.1.33 → ygg-0.1.34}/src/yggdrasil/types/python_defaults.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ygg
3
- Version: 0.1.33
3
+ Version: 0.1.34
4
4
  Summary: Type-friendly utilities for moving data between Python objects, Arrow, Polars, Pandas, Spark, and Databricks
5
5
  Author: Yggdrasil contributors
6
6
  License: Apache License
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "ygg"
7
- version = "0.1.33"
7
+ version = "0.1.34"
8
8
  description = "Type-friendly utilities for moving data between Python objects, Arrow, Polars, Pandas, Spark, and Databricks"
9
9
  readme = { file = "README.md", content-type = "text/markdown" }
10
10
  license = { file = "LICENSE" }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ygg
3
- Version: 0.1.33
3
+ Version: 0.1.34
4
4
  Summary: Type-friendly utilities for moving data between Python objects, Arrow, Polars, Pandas, Spark, and Databricks
5
5
  Author: Yggdrasil contributors
6
6
  License: Apache License
@@ -40,6 +40,7 @@ src/yggdrasil/libs/extensions/spark_extensions.py
40
40
  src/yggdrasil/pyutils/__init__.py
41
41
  src/yggdrasil/pyutils/callable_serde.py
42
42
  src/yggdrasil/pyutils/exceptions.py
43
+ src/yggdrasil/pyutils/expiring_dict.py
43
44
  src/yggdrasil/pyutils/modules.py
44
45
  src/yggdrasil/pyutils/parallel.py
45
46
  src/yggdrasil/pyutils/python_env.py
@@ -24,6 +24,7 @@ from .execution_context import ExecutionContext
24
24
  from ..workspaces.workspace import WorkspaceService, Workspace
25
25
  from ... import retry, CallableSerde
26
26
  from ...libs.databrickslib import databricks_sdk
27
+ from ...pyutils.expiring_dict import ExpiringDict
27
28
  from ...pyutils.modules import PipIndexSettings
28
29
  from ...pyutils.python_env import PythonEnv
29
30
 
@@ -45,6 +46,31 @@ else: # pragma: no cover - runtime fallback when SDK is missing
45
46
  __all__ = ["Cluster"]
46
47
 
47
48
 
49
+ NAME_ID_CACHE: dict[str, ExpiringDict] = {}
50
+
51
+
52
+ def set_cached_cluster_name(
53
+ host: str,
54
+ cluster_name: str,
55
+ cluster_id: str
56
+ ) -> None:
57
+ existing = NAME_ID_CACHE.get(host)
58
+
59
+ if not existing:
60
+ existing = NAME_ID_CACHE[host] = ExpiringDict(default_ttl=60)
61
+
62
+ existing[cluster_name] = cluster_id
63
+
64
+
65
+ def get_cached_cluster_id(
66
+ host: str,
67
+ cluster_name: str,
68
+ ) -> str:
69
+ existing = NAME_ID_CACHE.get(host)
70
+
71
+ return existing.get(cluster_name) if existing else None
72
+
73
+
48
74
  logger = logging.getLogger(__name__)
49
75
 
50
76
 
@@ -84,6 +110,7 @@ class Cluster(WorkspaceService):
84
110
 
85
111
  _details: Optional["ClusterDetails"] = dataclasses.field(default=None, repr=False)
86
112
  _details_refresh_time: float = dataclasses.field(default=0, repr=False)
113
+ _system_context: Optional[ExecutionContext] = None
87
114
 
88
115
  # host → Cluster instance
89
116
  _env_clusters: ClassVar[Dict[str, "Cluster"]] = {}
@@ -98,10 +125,11 @@ class Cluster(WorkspaceService):
98
125
  """Return the current cluster name."""
99
126
  return self.cluster_name
100
127
 
101
- def __post_init__(self):
102
- """Initialize cached details after dataclass construction."""
103
- if self._details is not None:
104
- self.details = self._details
128
+ @property
129
+ def system_context(self):
130
+ if self._system_context is None:
131
+ self._system_context = self.context(language=Language.PYTHON)
132
+ return self._system_context
105
133
 
106
134
  def is_in_databricks_environment(self):
107
135
  """Return True when running on a Databricks runtime."""
@@ -233,9 +261,8 @@ class Cluster(WorkspaceService):
233
261
  Returns:
234
262
  The updated PythonEnv instance.
235
263
  """
236
- with self.context() as c:
237
- m = c.remote_metadata
238
- version_info = m.version_info
264
+ m = self.system_context.remote_metadata
265
+ version_info = m.version_info
239
266
 
240
267
  python_version = ".".join(str(_) for _ in version_info)
241
268
 
@@ -258,7 +285,7 @@ class Cluster(WorkspaceService):
258
285
  )
259
286
 
260
287
  return target
261
-
288
+
262
289
  @property
263
290
  def details(self):
264
291
  """Return cached cluster details, refreshing when needed."""
@@ -300,21 +327,6 @@ class Cluster(WorkspaceService):
300
327
  return details.state
301
328
  return State.UNKNOWN
302
329
 
303
- def get_state(self, max_delay: float = None):
304
- """Return the cluster state with a custom refresh delay.
305
-
306
- Args:
307
- max_delay: Maximum age in seconds before refresh.
308
-
309
- Returns:
310
- The current cluster state.
311
- """
312
- details = self.fresh_details(max_delay=max_delay)
313
-
314
- if details is not None:
315
- return details.state
316
- return State.UNKNOWN
317
-
318
330
  @property
319
331
  def is_running(self):
320
332
  """Return True when the cluster is running."""
@@ -323,7 +335,10 @@ class Cluster(WorkspaceService):
323
335
  @property
324
336
  def is_pending(self):
325
337
  """Return True when the cluster is starting, resizing, or terminating."""
326
- return self.state in (State.PENDING, State.RESIZING, State.RESTARTING, State.TERMINATING)
338
+ return self.state in (
339
+ State.PENDING, State.RESIZING, State.RESTARTING,
340
+ State.TERMINATING
341
+ )
327
342
 
328
343
  @property
329
344
  def is_error(self):
@@ -507,45 +522,51 @@ class Cluster(WorkspaceService):
507
522
  ):
508
523
  pip_settings = PipIndexSettings.default_settings()
509
524
 
510
- if kwargs:
511
- details = ClusterDetails(**{
512
- **details.as_shallow_dict(),
513
- **kwargs
514
- })
525
+ new_details = ClusterDetails(**{
526
+ **details.as_shallow_dict(),
527
+ **kwargs
528
+ })
529
+
530
+ default_tags = self.workspace.default_tags()
531
+
532
+ if new_details.custom_tags is None:
533
+ new_details.custom_tags = default_tags
534
+ elif default_tags:
535
+ new_tags = new_details.custom_tags.copy()
536
+ new_tags.update(default_tags)
515
537
 
516
- if details.custom_tags is None:
517
- details.custom_tags = self.workspace.default_tags()
538
+ new_details.custom_tags = new_tags
518
539
 
519
- if details.cluster_name is None:
520
- details.cluster_name = self.workspace.current_user.user_name
540
+ if new_details.cluster_name is None:
541
+ new_details.cluster_name = self.workspace.current_user.user_name
521
542
 
522
- if details.spark_version is None or python_version:
523
- details.spark_version = self.latest_spark_version(
543
+ if new_details.spark_version is None or python_version:
544
+ new_details.spark_version = self.latest_spark_version(
524
545
  photon=False, python_version=python_version
525
546
  ).key
526
547
 
527
- if details.single_user_name:
528
- if not details.data_security_mode:
529
- details.data_security_mode = DataSecurityMode.DATA_SECURITY_MODE_DEDICATED
548
+ if new_details.single_user_name:
549
+ if not new_details.data_security_mode:
550
+ new_details.data_security_mode = DataSecurityMode.DATA_SECURITY_MODE_DEDICATED
530
551
 
531
- if not details.node_type_id:
532
- details.node_type_id = "rd-fleet.xlarge"
552
+ if not new_details.node_type_id:
553
+ new_details.node_type_id = "rd-fleet.xlarge"
533
554
 
534
- if getattr(details, "virtual_cluster_size", None) is None and details.num_workers is None and details.autoscale is None:
535
- if details.is_single_node is None:
536
- details.is_single_node = True
555
+ if getattr(new_details, "virtual_cluster_size", None) is None and new_details.num_workers is None and new_details.autoscale is None:
556
+ if new_details.is_single_node is None:
557
+ new_details.is_single_node = True
537
558
 
538
- if details.is_single_node is not None and details.kind is None:
539
- details.kind = Kind.CLASSIC_PREVIEW
559
+ if new_details.is_single_node is not None and new_details.kind is None:
560
+ new_details.kind = Kind.CLASSIC_PREVIEW
540
561
 
541
562
  if pip_settings.extra_index_urls:
542
- if details.spark_env_vars is None:
543
- details.spark_env_vars = {}
563
+ if new_details.spark_env_vars is None:
564
+ new_details.spark_env_vars = {}
544
565
  str_urls = " ".join(pip_settings.extra_index_urls)
545
- details.spark_env_vars["UV_EXTRA_INDEX_URL"] = details.spark_env_vars.get("UV_INDEX", str_urls)
546
- details.spark_env_vars["PIP_EXTRA_INDEX_URL"] = details.spark_env_vars.get("PIP_EXTRA_INDEX_URL", str_urls)
566
+ new_details.spark_env_vars["UV_EXTRA_INDEX_URL"] = new_details.spark_env_vars.get("UV_INDEX", str_urls)
567
+ new_details.spark_env_vars["PIP_EXTRA_INDEX_URL"] = new_details.spark_env_vars.get("PIP_EXTRA_INDEX_URL", str_urls)
547
568
 
548
- return details
569
+ return new_details
549
570
 
550
571
  def create_or_update(
551
572
  self,
@@ -658,7 +679,9 @@ class Cluster(WorkspaceService):
658
679
  )
659
680
 
660
681
  self.wait_for_status()
661
- self.details = self.clusters_client().edit_and_wait(**update_details)
682
+ self.details = retry(tries=4, delay=0.5, max_delay=2)(
683
+ self.clusters_client().edit_and_wait
684
+ )(**update_details)
662
685
 
663
686
  logger.info(
664
687
  "Updated %s",
@@ -704,6 +727,12 @@ class Cluster(WorkspaceService):
704
727
  if not cluster_name and not cluster_id:
705
728
  raise ValueError("Either name or cluster_id must be provided")
706
729
 
730
+ if not cluster_id:
731
+ cluster_id = get_cached_cluster_id(
732
+ host=self.workspace.safe_host,
733
+ cluster_name=cluster_name
734
+ )
735
+
707
736
  if cluster_id:
708
737
  try:
709
738
  details = self.clusters_client().get(cluster_id=cluster_id)
@@ -716,10 +745,13 @@ class Cluster(WorkspaceService):
716
745
  workspace=self.workspace, cluster_id=details.cluster_id, _details=details
717
746
  )
718
747
 
719
- cluster_name_cf = cluster_name.casefold()
720
-
721
748
  for cluster in self.list_clusters():
722
- if cluster_name_cf == cluster.details.cluster_name.casefold():
749
+ if cluster_name == cluster.details.cluster_name:
750
+ set_cached_cluster_name(
751
+ host=self.workspace.safe_host,
752
+ cluster_name=cluster.cluster_name,
753
+ cluster_id=cluster.cluster_id
754
+ )
723
755
  return cluster
724
756
 
725
757
  if raise_error:
@@ -812,6 +844,7 @@ class Cluster(WorkspaceService):
812
844
  env_keys: Optional[List[str]] = None,
813
845
  timeout: Optional[dt.timedelta] = None,
814
846
  result_tag: Optional[str] = None,
847
+ context: Optional[ExecutionContext] = None,
815
848
  ):
816
849
  """Execute a command or callable on the cluster.
817
850
 
@@ -823,11 +856,14 @@ class Cluster(WorkspaceService):
823
856
  env_keys: Optional environment variable names to pass.
824
857
  timeout: Optional timeout for execution.
825
858
  result_tag: Optional result tag for parsing output.
859
+ context: ExecutionContext to run or create new one
826
860
 
827
861
  Returns:
828
862
  The decoded result from the execution context.
829
863
  """
830
- return self.context(language=language).execute(
864
+ context = self.system_context if context is None else context
865
+
866
+ return context.execute(
831
867
  obj=obj,
832
868
  args=args,
833
869
  kwargs=kwargs,
@@ -849,6 +885,7 @@ class Cluster(WorkspaceService):
849
885
  timeout: Optional[dt.timedelta] = None,
850
886
  result_tag: Optional[str] = None,
851
887
  force_local: bool = False,
888
+ context: Optional[ExecutionContext] = None,
852
889
  **options
853
890
  ):
854
891
  """
@@ -875,16 +912,28 @@ class Cluster(WorkspaceService):
875
912
  timeout: Optional timeout for remote execution.
876
913
  result_tag: Optional tag for parsing remote output.
877
914
  force_local: force local execution
915
+ context: ExecutionContext to run or create new one
878
916
  **options: Additional execution options passed through.
879
917
 
880
918
  Returns:
881
919
  A decorator or wrapped function that executes remotely.
882
920
  """
921
+ if force_local or self.is_in_databricks_environment():
922
+ # Support both @ws.remote and @ws.remote(...)
923
+ if _func is not None and callable(_func):
924
+ return _func
925
+
926
+ def identity(x):
927
+ return x
928
+
929
+ return identity
930
+
931
+ context = self.system_context if context is None else context
932
+
883
933
  def decorator(func: Callable):
884
934
  if force_local or self.is_in_databricks_environment():
885
935
  return func
886
936
 
887
- context = self.context(language=language or Language.PYTHON)
888
937
  serialized = CallableSerde.from_callable(func)
889
938
 
890
939
  @functools.wraps(func)
@@ -1111,7 +1160,7 @@ class Cluster(WorkspaceService):
1111
1160
  )
1112
1161
 
1113
1162
  with open(value, mode="rb") as f:
1114
- target_path.write_bytes(f.read())
1163
+ target_path.open().write_all_bytes(f.read())
1115
1164
 
1116
1165
  value = str(target_path)
1117
1166
  elif "." in value and not "/" in value:
@@ -367,6 +367,8 @@ print(json.dumps(meta))"""
367
367
  args=args,
368
368
  kwargs=kwargs,
369
369
  result_tag=result_tag,
370
+ env_keys=env_keys,
371
+ env_variables=env_variables
370
372
  ) if not command else command
371
373
 
372
374
  raw_result = self.execute_command(
@@ -382,8 +384,9 @@ print(json.dumps(meta))"""
382
384
  module_name = module_name.group(1) if module_name else None
383
385
  module_name = module_name.split(".")[0]
384
386
 
385
- if module_name:
387
+ if module_name and "yggdrasil" not in module_name:
386
388
  self.close()
389
+
387
390
  self.cluster.install_libraries(
388
391
  libraries=[module_name],
389
392
  raise_error=True,
@@ -442,7 +445,7 @@ print(json.dumps(meta))"""
442
445
  module_name = module_name.group(1) if module_name else None
443
446
  module_name = module_name.split(".")[0]
444
447
 
445
- if module_name:
448
+ if module_name and "yggdrasil" not in module_name:
446
449
  self.close()
447
450
  self.cluster.install_libraries(
448
451
  libraries=[module_name],
@@ -14,6 +14,12 @@ if TYPE_CHECKING:
14
14
 
15
15
  from ..workspaces.workspace import Workspace
16
16
 
17
+
18
+ __all__ = [
19
+ "databricks_remote_compute"
20
+ ]
21
+
22
+
17
23
  ReturnType = TypeVar("ReturnType")
18
24
 
19
25
  logger = logging.getLogger(__name__)
@@ -70,8 +76,3 @@ def databricks_remote_compute(
70
76
  timeout=timeout,
71
77
  **options
72
78
  )
73
-
74
-
75
- __all__ = [
76
- "databricks_remote_compute",
77
- ]