ygg 0.1.16__py3-none-any.whl → 0.1.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ygg
3
- Version: 0.1.16
3
+ Version: 0.1.18
4
4
  Summary: Type-friendly utilities for moving data between Python objects, Arrow, Polars, Pandas, Spark, and Databricks
5
5
  Author: Yggdrasil contributors
6
6
  Project-URL: Homepage, https://github.com/Platob/Yggdrasil
@@ -1,9 +1,9 @@
1
1
  yggdrasil/__init__.py,sha256=6OPibApplA5TF4TeixkQO_qewpaAidYX-fSDvvKYcTI,91
2
2
  yggdrasil/databricks/__init__.py,sha256=aGVve5mpoQtxSK2nfzrexjRPoutCIyaOnKZijkG4_QE,92
3
3
  yggdrasil/databricks/compute/__init__.py,sha256=TVDwPmW2SOmHmnhzZhsvrWbrxZ_lEcgqe3l9BeB-oxM,218
4
- yggdrasil/databricks/compute/cluster.py,sha256=Z0igibj9osfw0sLPyO9qr1ZMXdGJAlay3xWO3fpJMms,23806
5
- yggdrasil/databricks/compute/execution_context.py,sha256=742neVcZWKInhDzWoqBQmFj82Hsg0AJLTFF1iB999lw,16242
6
- yggdrasil/databricks/compute/remote.py,sha256=NPljUmHt2ZHJmCw0EAaZAEXjTgH2QYOSO9t8-z4pRx0,1177
4
+ yggdrasil/databricks/compute/cluster.py,sha256=xElDioObG6exkUS08K-Ccs_EFNbWD69Z15fjvnHwOx8,26958
5
+ yggdrasil/databricks/compute/execution_context.py,sha256=pnzA_itZiYW4LpjgWnKZlovABEjHToDAp2ahVMnZmRQ,18625
6
+ yggdrasil/databricks/compute/remote.py,sha256=DzPVPk-4bt5bOL52Onur3xLNh3UzS2K70DU5HglhGSg,1216
7
7
  yggdrasil/databricks/jobs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
8
  yggdrasil/databricks/jobs/config.py,sha256=8Slfw4Wl7vu0kIlaUUqVqjjOgPwuULoo0rroENCbC20,11494
9
9
  yggdrasil/databricks/sql/__init__.py,sha256=JZpQ9eCphDf1l4yzIZ7a7OLigxqXkqOgb0Mio7Rj09A,181
@@ -26,14 +26,14 @@ yggdrasil/libs/extensions/polars_extensions.py,sha256=kojeJOW5VPBXMVTJT2gWUJypNu
26
26
  yggdrasil/libs/extensions/spark_extensions.py,sha256=uCcpLJr0wEE9V_-nc8DrQtAh9n2Q4mV_NicU-DNGG54,13396
27
27
  yggdrasil/pyutils/__init__.py,sha256=QF4o345TW0wRIt_SR_VuH8AnwESFtKkksnqaDurPkbY,61
28
28
  yggdrasil/pyutils/exceptions.py,sha256=Kt4xY4jPv-Ld8vBJaiQy51xbewD6Q-3bZkaa3THsFNM,2891
29
- yggdrasil/pyutils/modules.py,sha256=FKXUD2Swdj6fuh13yj4Tid2G1q6wjbPkzEMQGTwHXUE,10051
29
+ yggdrasil/pyutils/modules.py,sha256=r3C4TRA8C_fUTltITV0VJY0pbIdxdV1op2z8BX0FgJo,10201
30
30
  yggdrasil/pyutils/parallel.py,sha256=L2r4_iumOzDk7omWVBPYmT5U8n7suOdv1AFpa_ghHSY,5902
31
31
  yggdrasil/pyutils/retry.py,sha256=1zjascEsffvnkVRdHhdeoenk6tBPrzrj4VhBltbhBeU,10390
32
32
  yggdrasil/requests/__init__.py,sha256=THJz1IoZYQccwmXcQR3N8D-uWxCkfMtgeXDhONdERR8,41
33
33
  yggdrasil/requests/msal.py,sha256=ucnN45iZZpbXkByw212PX4shH4g0EeyrW8JEmfimWtY,5861
34
34
  yggdrasil/requests/session.py,sha256=YomLcDf8O_mc8BUnf9fr5wrupDnxEzaGw-guhV91NsE,830
35
- yggdrasil/ser/__init__.py,sha256=osetghhzlABSC8dk9GDJlEwBMIhAqOTVhEDOWMHpNFg,23
36
- yggdrasil/ser/method.py,sha256=TljQm882NfHI9ca5Moa2W2dVTpG_JdjlnsjGSMxVtOQ,24026
35
+ yggdrasil/ser/__init__.py,sha256=sS66Bxu8aiLb-8N2aNayquamfi7FobEH51JyV5ULDFI,31
36
+ yggdrasil/ser/callable_serde.py,sha256=1pDgrzAceoFQ7JS7qIeSxC4hCz2oAcQOkeWyYYeT7iY,21206
37
37
  yggdrasil/types/__init__.py,sha256=p0Qu_69RkePPgQGM9nSue_bcbEIAM2u9eo3zsEplHJ8,82
38
38
  yggdrasil/types/libs.py,sha256=7-p0M4C6TnEWpUGf2nY4XshhJxtXOE_-bsYmJWU6jtk,227
39
39
  yggdrasil/types/python_arrow.py,sha256=1Ac1ZnEgmH4nLNBfrfbM6E9EfANGWtdANqlwMENQkTw,21408
@@ -48,7 +48,7 @@ yggdrasil/types/cast/registry.py,sha256=-88mq-U1pDSGbEC9PRY0zJCzloyBodXgeSRBPb6h
48
48
  yggdrasil/types/cast/spark_cast.py,sha256=IHthM78dugabGXxNNW9sSHn-olDwzXcFdIFcPo9IiXU,23021
49
49
  yggdrasil/types/cast/spark_pandas_cast.py,sha256=8PgJItF_XbyBcNuBnXkMQU3PBy3sAPEXZT9SXL2WbU4,4200
50
50
  yggdrasil/types/cast/spark_polars_cast.py,sha256=ba1UOvY1ouGCro1Np9slXmJ4TEyWnUtwVEAwxGvPLlk,8336
51
- ygg-0.1.16.dist-info/METADATA,sha256=2DrFcSHmSoioahWJK_YojsdaFHHuclrWmLXq3HD0Ooo,5981
52
- ygg-0.1.16.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
53
- ygg-0.1.16.dist-info/top_level.txt,sha256=iBe9Kk4VIVbLpgv_p8OZUIfxgj4dgJ5wBg6vO3rigso,10
54
- ygg-0.1.16.dist-info/RECORD,,
51
+ ygg-0.1.18.dist-info/METADATA,sha256=d_adw4j5tP0rLrz-9sgI6fL5vWp5s6KgICs7lyLctgI,5981
52
+ ygg-0.1.18.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
53
+ ygg-0.1.18.dist-info/top_level.txt,sha256=iBe9Kk4VIVbLpgv_p8OZUIfxgj4dgJ5wBg6vO3rigso,10
54
+ ygg-0.1.18.dist-info/RECORD,,
@@ -26,7 +26,7 @@ from ..workspaces.workspace import WorkspaceService, Workspace
26
26
  from ... import retry
27
27
  from ...libs.databrickslib import databricks_sdk
28
28
  from ...pyutils.modules import PipIndexSettings
29
- from ...ser import SerializedFunction
29
+ from ...ser import CallableSerdeMixin
30
30
 
31
31
  if databricks_sdk is None: # pragma: no cover - import guard
32
32
  ResourceDoesNotExist = Exception # type: ignore
@@ -154,10 +154,17 @@ class Cluster(WorkspaceService):
154
154
  ] if _ not in libraries
155
155
  ])
156
156
 
157
+ python_version = sys.version_info
158
+
159
+ if python_version[0] < 3:
160
+ python_version = None
161
+ elif python_version[1] < 11:
162
+ python_version = None
163
+
157
164
  inst = self.create_or_update(
158
165
  cluster_id=cluster_id,
159
166
  cluster_name=cluster_name or self.cluster_name or self.workspace.current_user.user_name,
160
- python_version=sys.version_info,
167
+ python_version=python_version,
161
168
  single_user_name=single_user_name or self.workspace.current_user.user_name,
162
169
  runtime_engine=runtime_engine or RuntimeEngine.PHOTON,
163
170
  libraries=libraries,
@@ -172,8 +179,11 @@ class Cluster(WorkspaceService):
172
179
  self.details = self.clusters_client().get(cluster_id=self.cluster_id)
173
180
  return self._details
174
181
 
175
- def fresh_details(self, max_delay: float):
176
- if self.cluster_id and time.time() - self._details_refresh_time > max_delay:
182
+ def fresh_details(self, max_delay: float | None = None):
183
+ max_delay = max_delay or 0
184
+ delay = time.time() - self._details_refresh_time
185
+
186
+ if self.cluster_id and delay > max_delay:
177
187
  self.details = self.clusters_client().get(cluster_id=self.cluster_id)
178
188
  return self._details
179
189
 
@@ -187,14 +197,59 @@ class Cluster(WorkspaceService):
187
197
 
188
198
  @property
189
199
  def state(self):
190
- if self.cluster_id:
191
- return self.fresh_details(max_delay=10).state
200
+ details = self.fresh_details(max_delay=10)
201
+
202
+ if details is not None:
203
+ return details.state
204
+ return State.UNKNOWN
205
+
206
+ def get_state(self, max_delay: float = None):
207
+ details = self.fresh_details(max_delay=max_delay)
208
+
209
+ if details is not None:
210
+ return details.state
192
211
  return State.UNKNOWN
193
212
 
194
213
  @property
195
214
  def is_running(self):
196
215
  return self.state == State.RUNNING
197
216
 
217
+ @property
218
+ def is_pending(self):
219
+ return self.state in (State.PENDING, State.RESIZING, State.RESTARTING, State.TERMINATING)
220
+
221
+ @property
222
+ def is_error(self):
223
+ return self.state == State.ERROR
224
+
225
+ def raise_for_status(self):
226
+ if self.is_error:
227
+ raise DatabricksError("Error in %s" % self)
228
+
229
+ return self
230
+
231
+ def wait_for_status(
232
+ self,
233
+ tick: float = 0.5,
234
+ timeout: float = 600,
235
+ backoff: int = 2,
236
+ max_sleep_time: float = 15
237
+ ):
238
+ start = time.time()
239
+ sleep_time = tick
240
+
241
+ while self.is_pending:
242
+ time.sleep(sleep_time)
243
+
244
+ if time.time() - start > timeout:
245
+ raise TimeoutError("Waiting state for %s timed out")
246
+
247
+ sleep_time = min(max_sleep_time, sleep_time * backoff)
248
+
249
+ self.raise_for_status()
250
+
251
+ return self
252
+
198
253
  @property
199
254
  def spark_version(self) -> str:
200
255
  d = self.details
@@ -276,7 +331,8 @@ class Cluster(WorkspaceService):
276
331
 
277
332
  versions = [v for v in versions if py_for_key(v.key) == py_filter]
278
333
 
279
- if not versions and py_filter > 12:
334
+ # Handle superior pyton versions
335
+ if not versions and py_filter[1] > 12:
280
336
  return self.spark_versions(photon=photon)
281
337
 
282
338
  return versions
@@ -402,7 +458,7 @@ class Cluster(WorkspaceService):
402
458
  libraries: Optional[List[Union[str, "Library"]]] = None,
403
459
  **cluster_spec: Any
404
460
  ) -> "Cluster":
405
- self.install_libraries(libraries=libraries, timeout=None, raise_error=False)
461
+ self.install_libraries(libraries=libraries, wait_timeout=None, raise_error=False)
406
462
 
407
463
  existing_details = {
408
464
  k: v
@@ -482,24 +538,29 @@ class Cluster(WorkspaceService):
482
538
  ) -> "Cluster":
483
539
  return self.start()
484
540
 
485
- @retry(tries=4)
486
541
  def start(
487
542
  self,
488
543
  ) -> "Cluster":
544
+ self.wait_for_status()
545
+
489
546
  if not self.is_running:
490
547
  logger.info("Starting %s", self)
491
548
  self.details = self.clusters_client().start_and_wait(cluster_id=self.cluster_id)
492
549
  return self.wait_installed_libraries()
550
+
493
551
  return self
494
552
 
495
553
  @retry(tries=4)
496
554
  def restart(
497
555
  self,
498
556
  ):
557
+ self.wait_for_status()
558
+
499
559
  if self.is_running:
500
560
  logger.info("Restarting %s", self)
501
561
  self.details = self.clusters_client().restart_and_wait(cluster_id=self.cluster_id)
502
562
  return self.wait_installed_libraries()
563
+
503
564
  return self.start()
504
565
 
505
566
  def delete(
@@ -530,15 +591,14 @@ class Cluster(WorkspaceService):
530
591
  timeout: Optional[dt.timedelta] = None,
531
592
  result_tag: Optional[str] = None,
532
593
  ):
533
- with self.execution_context(language=language) as ctx:
534
- return ctx.execute(
535
- obj=obj,
536
- args=args,
537
- kwargs=kwargs,
538
- env_keys=env_keys,
539
- timeout=timeout,
540
- result_tag=result_tag
541
- )
594
+ return self.execution_context(language=language).execute(
595
+ obj=obj,
596
+ args=args,
597
+ kwargs=kwargs,
598
+ env_keys=env_keys,
599
+ timeout=timeout,
600
+ result_tag=result_tag
601
+ )
542
602
 
543
603
  # ------------------------------------------------------------------
544
604
  # decorator that routes function calls via `execute`
@@ -551,6 +611,7 @@ class Cluster(WorkspaceService):
551
611
  env_keys: Optional[List[str]] = None,
552
612
  timeout: Optional[dt.timedelta] = None,
553
613
  result_tag: Optional[str] = None,
614
+ **options
554
615
  ):
555
616
  """
556
617
  Decorator to run a function via Workspace.execute instead of locally.
@@ -570,7 +631,7 @@ class Cluster(WorkspaceService):
570
631
  """
571
632
  def decorator(func: Callable):
572
633
  context = self.execution_context(language=language or Language.PYTHON)
573
- serialized = func if isinstance(func, SerializedFunction) else SerializedFunction.from_callable(func)
634
+ serialized = CallableSerdeMixin.from_callable(func)
574
635
 
575
636
  @functools.wraps(func)
576
637
  def wrapper(*args, **kwargs):
@@ -584,6 +645,7 @@ class Cluster(WorkspaceService):
584
645
  env_keys=env_keys,
585
646
  timeout=timeout,
586
647
  result_tag=result_tag,
648
+ **options
587
649
  )
588
650
 
589
651
  return wrapper
@@ -597,39 +659,96 @@ class Cluster(WorkspaceService):
597
659
  def install_libraries(
598
660
  self,
599
661
  libraries: Optional[List[Union[str, "Library"]]] = None,
600
- timeout: Optional[dt.timedelta] = dt.timedelta(minutes=5),
662
+ wait_timeout: Optional[dt.timedelta] = dt.timedelta(minutes=20),
601
663
  pip_settings: Optional[PipIndexSettings] = None,
602
- raise_error: bool = True
664
+ raise_error: bool = True,
665
+ restart: bool = True,
603
666
  ) -> "Cluster":
604
667
  if not libraries:
605
668
  return self
606
669
 
607
670
  wsdk = self.workspace.sdk()
608
671
 
609
- wsdk.libraries.install(
610
- cluster_id=self.cluster_id,
611
- libraries=[
612
- self._check_library(_, pip_settings=pip_settings)
613
- for _ in libraries if _
672
+ libraries = [
673
+ self._check_library(_, pip_settings=pip_settings)
674
+ for _ in libraries if _
675
+ ]
676
+
677
+ if libraries:
678
+ wsdk.libraries.install(
679
+ cluster_id=self.cluster_id,
680
+ libraries=[
681
+ self._check_library(_, pip_settings=pip_settings)
682
+ for _ in libraries if _
683
+ ]
684
+ )
685
+
686
+ if wait_timeout is not None:
687
+ self.wait_installed_libraries(
688
+ timeout=wait_timeout, pip_settings=pip_settings, raise_error=raise_error
689
+ )
690
+
691
+ return self
692
+
693
+ def installed_library_statuses(self):
694
+ return self.workspace.sdk().libraries.cluster_status(cluster_id=self.cluster_id)
695
+
696
+ def uninstall_libraries(
697
+ self,
698
+ pypi_packages: Optional[list[str]] = None,
699
+ libraries: Optional[list["Library"]] = None,
700
+ restart: bool = True
701
+ ):
702
+ if libraries is None:
703
+ to_remove = [
704
+ lib.library
705
+ for lib in self.installed_library_statuses()
706
+ if self._filter_lib(
707
+ lib,
708
+ pypi_packages=pypi_packages,
709
+ default_filter=False
710
+ )
614
711
  ]
615
- )
712
+ else:
713
+ to_remove = libraries
714
+
715
+ if to_remove:
716
+ self.workspace.sdk().libraries.uninstall(
717
+ cluster_id=self.cluster_id,
718
+ libraries=to_remove
719
+ )
616
720
 
617
- if timeout is not None:
618
- self.wait_installed_libraries(timeout=timeout, pip_settings=pip_settings, raise_error=raise_error)
721
+ if restart:
722
+ self.restart()
619
723
 
620
724
  return self
621
725
 
726
+ @staticmethod
727
+ def _filter_lib(
728
+ lib: Optional["Library"],
729
+ pypi_packages: Optional[list[str]] = None,
730
+ default_filter: bool = False
731
+ ):
732
+ if lib is None:
733
+ return False
734
+
735
+ if lib.pypi:
736
+ if lib.pypi.package and pypi_packages:
737
+ return lib.pypi.package in pypi_packages
738
+
739
+ return default_filter
740
+
622
741
  def wait_installed_libraries(
623
742
  self,
624
- timeout: dt.timedelta = dt.timedelta(minutes=5),
743
+ timeout: dt.timedelta = dt.timedelta(minutes=20),
625
744
  pip_settings: Optional[PipIndexSettings] = None,
626
- raise_error: bool = True
745
+ raise_error: bool = True,
627
746
  ):
628
747
  if not self.is_running:
629
748
  return self
630
749
 
631
- wsdk = self.workspace.sdk()
632
- statuses = list(wsdk.libraries.cluster_status(cluster_id=self.cluster_id))
750
+ statuses = list(self.installed_library_statuses())
751
+
633
752
  max_time = time.time() + timeout.total_seconds()
634
753
 
635
754
  while True:
@@ -641,6 +760,7 @@ class Cluster(WorkspaceService):
641
760
  if failed:
642
761
  if raise_error:
643
762
  raise DatabricksError("Libraries %s in %s failed to install" % (failed, self))
763
+
644
764
  logger.warning(
645
765
  "Libraries %s in %s failed to install",
646
766
  failed, self
@@ -662,7 +782,7 @@ class Cluster(WorkspaceService):
662
782
  )
663
783
 
664
784
  time.sleep(10)
665
- statuses = list(wsdk.libraries.cluster_status(cluster_id=self.cluster_id))
785
+ statuses = list(self.installed_library_statuses())
666
786
 
667
787
  return self
668
788
 
@@ -680,6 +800,8 @@ class Cluster(WorkspaceService):
680
800
  if isinstance(value, Library):
681
801
  return value
682
802
 
803
+ pip_settings = PipIndexSettings.default_settings() if pip_settings is None else pip_settings
804
+
683
805
  if isinstance(value, str):
684
806
  if os.path.exists(value):
685
807
  target_path = self.workspace.shared_cache_path(
@@ -698,11 +820,11 @@ class Cluster(WorkspaceService):
698
820
  elif value.endswith(".whl"):
699
821
  return Library(whl=value)
700
822
 
701
- # Fallback: treat as PyPI / private index package
702
- if pip_settings:
703
- repo = pip_settings.extra_index_urls[0] if pip_settings.extra_index_urls else None
704
- else:
705
- repo = None
823
+ repo = None
824
+
825
+ if pip_settings.extra_index_url:
826
+ if value.startswith("datamanagement") or value.startswith("TSSecrets") or value.startswith("tgp_"):
827
+ repo = pip_settings.extra_index_url
706
828
 
707
829
  return Library(
708
830
  pypi=PythonPyPiLibrary(