ygg 0.1.57__py3-none-any.whl → 0.1.64__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ygg-0.1.57.dist-info → ygg-0.1.64.dist-info}/METADATA +2 -2
- ygg-0.1.64.dist-info/RECORD +74 -0
- yggdrasil/ai/__init__.py +2 -0
- yggdrasil/ai/session.py +87 -0
- yggdrasil/ai/sql_session.py +310 -0
- yggdrasil/databricks/__init__.py +0 -3
- yggdrasil/databricks/compute/cluster.py +68 -113
- yggdrasil/databricks/compute/command_execution.py +674 -0
- yggdrasil/databricks/compute/exceptions.py +19 -0
- yggdrasil/databricks/compute/execution_context.py +491 -282
- yggdrasil/databricks/compute/remote.py +4 -14
- yggdrasil/databricks/exceptions.py +10 -0
- yggdrasil/databricks/sql/__init__.py +0 -4
- yggdrasil/databricks/sql/engine.py +178 -178
- yggdrasil/databricks/sql/exceptions.py +9 -1
- yggdrasil/databricks/sql/statement_result.py +108 -120
- yggdrasil/databricks/sql/warehouse.py +339 -92
- yggdrasil/databricks/workspaces/io.py +185 -40
- yggdrasil/databricks/workspaces/path.py +114 -100
- yggdrasil/databricks/workspaces/workspace.py +210 -61
- yggdrasil/exceptions.py +7 -0
- yggdrasil/libs/databrickslib.py +22 -18
- yggdrasil/libs/extensions/spark_extensions.py +1 -1
- yggdrasil/libs/pandaslib.py +15 -6
- yggdrasil/libs/polarslib.py +49 -13
- yggdrasil/pyutils/__init__.py +1 -2
- yggdrasil/pyutils/callable_serde.py +12 -19
- yggdrasil/pyutils/exceptions.py +16 -0
- yggdrasil/pyutils/modules.py +6 -7
- yggdrasil/pyutils/python_env.py +16 -21
- yggdrasil/pyutils/waiting_config.py +171 -0
- yggdrasil/requests/msal.py +9 -96
- yggdrasil/types/cast/arrow_cast.py +3 -0
- yggdrasil/types/cast/pandas_cast.py +157 -169
- yggdrasil/types/cast/polars_cast.py +11 -43
- yggdrasil/types/dummy_class.py +81 -0
- yggdrasil/types/file_format.py +6 -2
- yggdrasil/types/python_defaults.py +92 -76
- yggdrasil/version.py +1 -1
- ygg-0.1.57.dist-info/RECORD +0 -66
- yggdrasil/databricks/ai/loki.py +0 -53
- {ygg-0.1.57.dist-info → ygg-0.1.64.dist-info}/WHEEL +0 -0
- {ygg-0.1.57.dist-info → ygg-0.1.64.dist-info}/entry_points.txt +0 -0
- {ygg-0.1.57.dist-info → ygg-0.1.64.dist-info}/licenses/LICENSE +0 -0
- {ygg-0.1.57.dist-info → ygg-0.1.64.dist-info}/top_level.txt +0 -0
- /yggdrasil/{databricks/ai/__init__.py → pyutils/mimetypes.py} +0 -0
|
@@ -4,29 +4,40 @@ import dataclasses
|
|
|
4
4
|
import logging
|
|
5
5
|
import os
|
|
6
6
|
import posixpath
|
|
7
|
+
import time
|
|
7
8
|
from abc import ABC
|
|
8
9
|
from dataclasses import dataclass
|
|
9
10
|
from pathlib import Path
|
|
11
|
+
from threading import Thread
|
|
10
12
|
from typing import (
|
|
11
13
|
BinaryIO,
|
|
12
14
|
Iterator,
|
|
13
15
|
Optional,
|
|
14
|
-
Union, TYPE_CHECKING, List
|
|
16
|
+
Union, TYPE_CHECKING, List, Set
|
|
15
17
|
)
|
|
16
18
|
|
|
17
|
-
if TYPE_CHECKING:
|
|
18
|
-
from ..compute.cluster import Cluster
|
|
19
|
-
|
|
20
19
|
from .path import DatabricksPath, DatabricksPathKind
|
|
20
|
+
from ...libs.databrickslib import databricks_sdk, WorkspaceClient, DatabricksDummyClass
|
|
21
|
+
from ...pyutils.expiring_dict import ExpiringDict
|
|
22
|
+
from ...pyutils.waiting_config import WaitingConfig, WaitingConfigArg
|
|
21
23
|
from ...version import __version__ as YGGDRASIL_VERSION
|
|
22
|
-
from ...libs.databrickslib import require_databricks_sdk, databricks_sdk
|
|
23
24
|
|
|
24
25
|
if databricks_sdk is not None:
|
|
25
|
-
from databricks.sdk import WorkspaceClient
|
|
26
26
|
from databricks.sdk.errors import ResourceDoesNotExist
|
|
27
27
|
from databricks.sdk.service.workspace import ExportFormat, ObjectInfo
|
|
28
28
|
from databricks.sdk.dbutils import FileInfo
|
|
29
29
|
from databricks.sdk.service.files import DirectoryEntry
|
|
30
|
+
else:
|
|
31
|
+
ResourceDoesNotExist = DatabricksDummyClass
|
|
32
|
+
ExportFormat = DatabricksDummyClass
|
|
33
|
+
ObjectInfo = DatabricksDummyClass
|
|
34
|
+
FileInfo = DatabricksDummyClass
|
|
35
|
+
DirectoryEntry = DatabricksDummyClass
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
if TYPE_CHECKING:
|
|
39
|
+
from ..sql.warehouse import SQLWarehouse
|
|
40
|
+
from ..compute.cluster import Cluster
|
|
30
41
|
|
|
31
42
|
|
|
32
43
|
__all__ = [
|
|
@@ -36,7 +47,26 @@ __all__ = [
|
|
|
36
47
|
]
|
|
37
48
|
|
|
38
49
|
|
|
39
|
-
|
|
50
|
+
LOGGER = logging.getLogger(__name__)
|
|
51
|
+
CHECKED_TMP_WORKSPACES: ExpiringDict[str, Set[str]] = ExpiringDict()
|
|
52
|
+
|
|
53
|
+
def is_checked_tmp_path(
|
|
54
|
+
host: str,
|
|
55
|
+
base_path: str
|
|
56
|
+
):
|
|
57
|
+
existing = CHECKED_TMP_WORKSPACES.get(host)
|
|
58
|
+
|
|
59
|
+
if existing is None:
|
|
60
|
+
CHECKED_TMP_WORKSPACES[host] = set(base_path)
|
|
61
|
+
|
|
62
|
+
return False
|
|
63
|
+
|
|
64
|
+
if base_path in existing:
|
|
65
|
+
return True
|
|
66
|
+
|
|
67
|
+
existing.add(base_path)
|
|
68
|
+
|
|
69
|
+
return False
|
|
40
70
|
|
|
41
71
|
|
|
42
72
|
# ---------------------------------------------------------------------------
|
|
@@ -47,7 +77,7 @@ def _get_env_product():
|
|
|
47
77
|
|
|
48
78
|
if not v:
|
|
49
79
|
return "yggdrasil"
|
|
50
|
-
return v
|
|
80
|
+
return v
|
|
51
81
|
|
|
52
82
|
|
|
53
83
|
def _get_env_product_version():
|
|
@@ -57,7 +87,7 @@ def _get_env_product_version():
|
|
|
57
87
|
if _get_env_product() == "yggdrasil":
|
|
58
88
|
return YGGDRASIL_VERSION
|
|
59
89
|
return None
|
|
60
|
-
return v
|
|
90
|
+
return v
|
|
61
91
|
|
|
62
92
|
|
|
63
93
|
def _get_env_product_tag():
|
|
@@ -65,7 +95,7 @@ def _get_env_product_tag():
|
|
|
65
95
|
|
|
66
96
|
if not v:
|
|
67
97
|
return None
|
|
68
|
-
return v
|
|
98
|
+
return v
|
|
69
99
|
|
|
70
100
|
|
|
71
101
|
@dataclass
|
|
@@ -115,8 +145,12 @@ class Workspace:
|
|
|
115
145
|
_cached_token: Optional[str] = dataclasses.field(default=None, repr=False, compare=False, hash=False)
|
|
116
146
|
|
|
117
147
|
# -------------------------
|
|
118
|
-
#
|
|
148
|
+
# Python methods
|
|
119
149
|
# -------------------------
|
|
150
|
+
def __post_init__(self):
|
|
151
|
+
self.product = self.product.strip().lower() if self.product else "yggdrasil"
|
|
152
|
+
self.product_tag = self.product_tag.strip().lower() if self.product_tag else "main"
|
|
153
|
+
|
|
120
154
|
def __getstate__(self):
|
|
121
155
|
"""Serialize the workspace state for pickling.
|
|
122
156
|
|
|
@@ -245,8 +279,6 @@ class Workspace:
|
|
|
245
279
|
|
|
246
280
|
instance = self.clone_instance() if clone else self
|
|
247
281
|
|
|
248
|
-
require_databricks_sdk()
|
|
249
|
-
|
|
250
282
|
# Build Config from config_dict if available, else from fields.
|
|
251
283
|
kwargs = {
|
|
252
284
|
"host": instance.host,
|
|
@@ -357,7 +389,7 @@ class Workspace:
|
|
|
357
389
|
@property
|
|
358
390
|
def safe_host(self):
|
|
359
391
|
if not self.host:
|
|
360
|
-
|
|
392
|
+
self.host = self.sdk().config.host
|
|
361
393
|
return self.host
|
|
362
394
|
|
|
363
395
|
@property
|
|
@@ -392,7 +424,7 @@ class Workspace:
|
|
|
392
424
|
# ------------------------------------------------------------------ #
|
|
393
425
|
# Path helpers
|
|
394
426
|
# ------------------------------------------------------------------ #
|
|
395
|
-
def
|
|
427
|
+
def filesystem(
|
|
396
428
|
self,
|
|
397
429
|
workspace: Optional["Workspace"] = None,
|
|
398
430
|
):
|
|
@@ -418,7 +450,8 @@ class Workspace:
|
|
|
418
450
|
self,
|
|
419
451
|
parts: Union[List[str], str],
|
|
420
452
|
kind: Optional[DatabricksPathKind] = None,
|
|
421
|
-
workspace: Optional["Workspace"] = None
|
|
453
|
+
workspace: Optional["Workspace"] = None,
|
|
454
|
+
temporary: bool = False
|
|
422
455
|
):
|
|
423
456
|
"""Create a DatabricksPath in this workspace.
|
|
424
457
|
|
|
@@ -426,6 +459,7 @@ class Workspace:
|
|
|
426
459
|
parts: Path parts or string to parse.
|
|
427
460
|
kind: Optional path kind override.
|
|
428
461
|
workspace: Optional workspace override.
|
|
462
|
+
temporary: Temporary path
|
|
429
463
|
|
|
430
464
|
Returns:
|
|
431
465
|
A DatabricksPath instance.
|
|
@@ -435,15 +469,137 @@ class Workspace:
|
|
|
435
469
|
if kind is None or isinstance(parts, str):
|
|
436
470
|
return DatabricksPath.parse(
|
|
437
471
|
obj=parts,
|
|
438
|
-
workspace=workspace
|
|
472
|
+
workspace=workspace,
|
|
473
|
+
temporary=temporary
|
|
439
474
|
)
|
|
440
475
|
|
|
441
476
|
return DatabricksPath(
|
|
442
477
|
kind=kind,
|
|
443
478
|
parts=parts,
|
|
479
|
+
temporary=temporary,
|
|
444
480
|
_workspace=workspace
|
|
445
481
|
)
|
|
446
482
|
|
|
483
|
+
@staticmethod
|
|
484
|
+
def _base_tmp_path(
|
|
485
|
+
catalog_name: Optional[str] = None,
|
|
486
|
+
schema_name: Optional[str] = None,
|
|
487
|
+
volume_name: Optional[str] = None,
|
|
488
|
+
) -> str:
|
|
489
|
+
if catalog_name and schema_name:
|
|
490
|
+
base_path = "/Volumes/%s/%s/%s" % (
|
|
491
|
+
catalog_name, schema_name, volume_name or "tmp"
|
|
492
|
+
)
|
|
493
|
+
else:
|
|
494
|
+
base_path = "/Workspace/Shared/.ygg/tmp"
|
|
495
|
+
|
|
496
|
+
return base_path
|
|
497
|
+
|
|
498
|
+
def tmp_path(
|
|
499
|
+
self,
|
|
500
|
+
suffix: Optional[str] = None,
|
|
501
|
+
extension: Optional[str] = None,
|
|
502
|
+
max_lifetime: Optional[float] = None,
|
|
503
|
+
catalog_name: Optional[str] = None,
|
|
504
|
+
schema_name: Optional[str] = None,
|
|
505
|
+
volume_name: Optional[str] = None,
|
|
506
|
+
base_path: Optional[str] = None,
|
|
507
|
+
) -> DatabricksPath:
|
|
508
|
+
"""
|
|
509
|
+
Shared cache base under Volumes for the current user.
|
|
510
|
+
|
|
511
|
+
Args:
|
|
512
|
+
suffix: Optional suffix
|
|
513
|
+
extension: Optional extension suffix to append.
|
|
514
|
+
max_lifetime: Max lifetime of temporary path
|
|
515
|
+
catalog_name: Unity catalog name for volume path
|
|
516
|
+
schema_name: Unity schema name for volume path
|
|
517
|
+
volume_name: Unity volume name for volume path
|
|
518
|
+
base_path: Base temporary path
|
|
519
|
+
|
|
520
|
+
Returns:
|
|
521
|
+
A DatabricksPath pointing at the shared cache location.
|
|
522
|
+
"""
|
|
523
|
+
start = int(time.time())
|
|
524
|
+
max_lifetime = int(max_lifetime or 48 * 3600)
|
|
525
|
+
end = max(0, int(start + max_lifetime))
|
|
526
|
+
|
|
527
|
+
base_path = base_path or self._base_tmp_path(
|
|
528
|
+
catalog_name=catalog_name,
|
|
529
|
+
schema_name=schema_name,
|
|
530
|
+
volume_name=volume_name
|
|
531
|
+
)
|
|
532
|
+
|
|
533
|
+
rnd = os.urandom(4).hex()
|
|
534
|
+
temp_path = f"tmp-{start}-{end}-{rnd}"
|
|
535
|
+
|
|
536
|
+
if suffix:
|
|
537
|
+
temp_path += suffix
|
|
538
|
+
|
|
539
|
+
if extension:
|
|
540
|
+
temp_path += ".%s" % extension
|
|
541
|
+
|
|
542
|
+
self.clean_tmp_folder(
|
|
543
|
+
raise_error=False,
|
|
544
|
+
wait=False,
|
|
545
|
+
base_path=base_path
|
|
546
|
+
)
|
|
547
|
+
|
|
548
|
+
return self.dbfs_path(f"{base_path}/{temp_path}")
|
|
549
|
+
|
|
550
|
+
def clean_tmp_folder(
|
|
551
|
+
self,
|
|
552
|
+
raise_error: bool = True,
|
|
553
|
+
wait: Optional[WaitingConfigArg] = True,
|
|
554
|
+
catalog_name: Optional[str] = None,
|
|
555
|
+
schema_name: Optional[str] = None,
|
|
556
|
+
volume_name: Optional[str] = None,
|
|
557
|
+
base_path: Optional[str] = None,
|
|
558
|
+
):
|
|
559
|
+
wait = WaitingConfig.check_arg(wait)
|
|
560
|
+
|
|
561
|
+
base_path = base_path or self._base_tmp_path(
|
|
562
|
+
catalog_name=catalog_name,
|
|
563
|
+
schema_name=schema_name,
|
|
564
|
+
volume_name=volume_name
|
|
565
|
+
)
|
|
566
|
+
|
|
567
|
+
if is_checked_tmp_path(host=self.safe_host, base_path=base_path):
|
|
568
|
+
return self
|
|
569
|
+
|
|
570
|
+
if wait.timeout:
|
|
571
|
+
base_path = self.dbfs_path(base_path)
|
|
572
|
+
|
|
573
|
+
LOGGER.debug(
|
|
574
|
+
"Cleaning temp path %s",
|
|
575
|
+
base_path
|
|
576
|
+
)
|
|
577
|
+
|
|
578
|
+
for path in base_path.ls(recursive=False, allow_not_found=True):
|
|
579
|
+
if path.name.startswith("tmp"):
|
|
580
|
+
parts = path.name.split("-")
|
|
581
|
+
|
|
582
|
+
if len(parts) > 2 and parts[0] == "tmp" and parts[1].isdigit() and parts[2].isdigit():
|
|
583
|
+
end = int(parts[2])
|
|
584
|
+
|
|
585
|
+
if end and time.time() > end:
|
|
586
|
+
path.remove(recursive=True)
|
|
587
|
+
|
|
588
|
+
LOGGER.info(
|
|
589
|
+
"Cleaned temp path %s",
|
|
590
|
+
base_path
|
|
591
|
+
)
|
|
592
|
+
else:
|
|
593
|
+
Thread(
|
|
594
|
+
target=self.clean_tmp_folder,
|
|
595
|
+
kwargs={
|
|
596
|
+
"raise_error": raise_error,
|
|
597
|
+
"base_path": base_path
|
|
598
|
+
}
|
|
599
|
+
).start()
|
|
600
|
+
|
|
601
|
+
return self
|
|
602
|
+
|
|
447
603
|
def shared_cache_path(
|
|
448
604
|
self,
|
|
449
605
|
suffix: Optional[str] = None
|
|
@@ -469,13 +625,13 @@ class Workspace:
|
|
|
469
625
|
# SDK access / connection
|
|
470
626
|
# ------------------------------------------------------------------ #
|
|
471
627
|
|
|
472
|
-
def sdk(self) ->
|
|
628
|
+
def sdk(self) -> WorkspaceClient:
|
|
473
629
|
"""Return the connected WorkspaceClient.
|
|
474
630
|
|
|
475
631
|
Returns:
|
|
476
632
|
The WorkspaceClient instance.
|
|
477
633
|
"""
|
|
478
|
-
return self.connect()._sdk
|
|
634
|
+
return self.connect(clone=False)._sdk
|
|
479
635
|
|
|
480
636
|
# ------------------------------------------------------------------ #
|
|
481
637
|
# List / open / delete / SQL
|
|
@@ -529,14 +685,15 @@ class Workspace:
|
|
|
529
685
|
yield from self.list_path(child_path, recursive=True)
|
|
530
686
|
return
|
|
531
687
|
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
688
|
+
else:
|
|
689
|
+
# Workspace files / notebooks
|
|
690
|
+
try:
|
|
691
|
+
entries = list(sdk.workspace.list(path, recursive=recursive))
|
|
692
|
+
except ResourceDoesNotExist:
|
|
693
|
+
return
|
|
537
694
|
|
|
538
|
-
|
|
539
|
-
|
|
695
|
+
for obj in entries:
|
|
696
|
+
yield obj
|
|
540
697
|
|
|
541
698
|
def open_path(
|
|
542
699
|
self,
|
|
@@ -582,20 +739,29 @@ class Workspace:
|
|
|
582
739
|
"""
|
|
583
740
|
return os.getenv("DATABRICKS_RUNTIME_VERSION") is not None
|
|
584
741
|
|
|
585
|
-
def default_tags(self):
|
|
742
|
+
def default_tags(self, update: bool = True):
|
|
586
743
|
"""Return default resource tags for Databricks assets.
|
|
587
744
|
|
|
588
745
|
Returns:
|
|
589
746
|
A dict of default tags.
|
|
590
747
|
"""
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
748
|
+
if update:
|
|
749
|
+
base = {
|
|
750
|
+
k: v
|
|
751
|
+
for k, v in (
|
|
752
|
+
("Product", self.product),
|
|
753
|
+
)
|
|
754
|
+
if v
|
|
755
|
+
}
|
|
756
|
+
else:
|
|
757
|
+
base = {
|
|
758
|
+
k: v
|
|
759
|
+
for k, v in (
|
|
760
|
+
("Product", self.product),
|
|
761
|
+
("ProductTag", self.product_tag),
|
|
762
|
+
)
|
|
763
|
+
if v
|
|
764
|
+
}
|
|
599
765
|
|
|
600
766
|
if self.custom_tags:
|
|
601
767
|
base.update(self.custom_tags)
|
|
@@ -605,7 +771,7 @@ class Workspace:
|
|
|
605
771
|
def sql(
|
|
606
772
|
self,
|
|
607
773
|
workspace: Optional["Workspace"] = None,
|
|
608
|
-
|
|
774
|
+
warehouse: Optional["SQLWarehouse"] = None,
|
|
609
775
|
catalog_name: Optional[str] = None,
|
|
610
776
|
schema_name: Optional[str] = None,
|
|
611
777
|
):
|
|
@@ -613,7 +779,7 @@ class Workspace:
|
|
|
613
779
|
|
|
614
780
|
Args:
|
|
615
781
|
workspace: Optional workspace override.
|
|
616
|
-
|
|
782
|
+
warehouse: Optional SQL warehouse.
|
|
617
783
|
catalog_name: Optional catalog name.
|
|
618
784
|
schema_name: Optional schema name.
|
|
619
785
|
|
|
@@ -622,11 +788,13 @@ class Workspace:
|
|
|
622
788
|
"""
|
|
623
789
|
from ..sql import SQLEngine
|
|
624
790
|
|
|
791
|
+
workspace = self if workspace is None else workspace
|
|
792
|
+
|
|
625
793
|
return SQLEngine(
|
|
626
|
-
workspace=
|
|
627
|
-
warehouse_id=warehouse_id,
|
|
794
|
+
workspace=workspace,
|
|
628
795
|
catalog_name=catalog_name,
|
|
629
796
|
schema_name=schema_name,
|
|
797
|
+
_warehouse=warehouse,
|
|
630
798
|
)
|
|
631
799
|
|
|
632
800
|
def warehouses(
|
|
@@ -652,9 +820,9 @@ class Workspace:
|
|
|
652
820
|
"""Return a Cluster helper bound to this workspace.
|
|
653
821
|
|
|
654
822
|
Args:
|
|
823
|
+
workspace: Optional workspace override.
|
|
655
824
|
cluster_id: Optional cluster id.
|
|
656
825
|
cluster_name: Optional cluster name.
|
|
657
|
-
**kwargs: Additional Cluster parameters.
|
|
658
826
|
|
|
659
827
|
Returns:
|
|
660
828
|
A Cluster instance.
|
|
@@ -662,21 +830,11 @@ class Workspace:
|
|
|
662
830
|
from ..compute.cluster import Cluster
|
|
663
831
|
|
|
664
832
|
return Cluster(
|
|
665
|
-
workspace=self,
|
|
833
|
+
workspace=self if workspace is None else workspace,
|
|
666
834
|
cluster_id=cluster_id,
|
|
667
835
|
cluster_name=cluster_name,
|
|
668
836
|
)
|
|
669
837
|
|
|
670
|
-
def loki(
|
|
671
|
-
self,
|
|
672
|
-
workspace: Optional["Workspace"] = None,
|
|
673
|
-
):
|
|
674
|
-
from ..ai.loki import Loki
|
|
675
|
-
|
|
676
|
-
return Loki(
|
|
677
|
-
workspace=self,
|
|
678
|
-
)
|
|
679
|
-
|
|
680
838
|
# ---------------------------------------------------------------------------
|
|
681
839
|
# Workspace-bound base class
|
|
682
840
|
# ---------------------------------------------------------------------------
|
|
@@ -689,15 +847,6 @@ class WorkspaceService(ABC):
|
|
|
689
847
|
"""Base class for helpers that depend on a Workspace."""
|
|
690
848
|
workspace: Workspace = dataclasses.field(default_factory=Workspace)
|
|
691
849
|
|
|
692
|
-
def __post_init__(self):
|
|
693
|
-
"""Ensure a Workspace instance is available.
|
|
694
|
-
|
|
695
|
-
Returns:
|
|
696
|
-
None.
|
|
697
|
-
"""
|
|
698
|
-
if self.workspace is None:
|
|
699
|
-
self.workspace = Workspace()
|
|
700
|
-
|
|
701
850
|
def __enter__(self):
|
|
702
851
|
"""Enter a context manager and connect the workspace.
|
|
703
852
|
|
|
@@ -742,7 +891,7 @@ class WorkspaceService(ABC):
|
|
|
742
891
|
parts: Union[List[str], str],
|
|
743
892
|
kind: Optional[DatabricksPathKind] = None,
|
|
744
893
|
workspace: Optional["Workspace"] = None
|
|
745
|
-
):
|
|
894
|
+
) -> "DatabricksPath":
|
|
746
895
|
"""Create a DatabricksPath in the underlying workspace.
|
|
747
896
|
|
|
748
897
|
Args:
|
yggdrasil/exceptions.py
ADDED
yggdrasil/libs/databrickslib.py
CHANGED
|
@@ -1,24 +1,13 @@
|
|
|
1
1
|
"""Optional Databricks SDK dependency helpers."""
|
|
2
|
+
from ..types.dummy_class import DummyModuleClass
|
|
2
3
|
|
|
3
|
-
try:
|
|
4
|
-
import databricks
|
|
5
|
-
import databricks.sdk # type: ignore
|
|
6
4
|
|
|
7
|
-
|
|
5
|
+
class DatabricksDummyClass(DummyModuleClass):
|
|
6
|
+
"""Placeholder object that raises if Databricks SDK is required."""
|
|
8
7
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
class _DatabricksDummy:
|
|
13
|
-
"""Placeholder object that raises if Databricks SDK is required."""
|
|
14
|
-
def __getattr__(self, item):
|
|
15
|
-
"""Raise an error when accessing missing Databricks SDK attributes."""
|
|
16
|
-
require_databricks_sdk()
|
|
17
|
-
|
|
18
|
-
databricks = _DatabricksDummy
|
|
19
|
-
databricks_sdk = _DatabricksDummy
|
|
20
|
-
|
|
21
|
-
WorkspaceClient = _DatabricksDummy
|
|
8
|
+
@classmethod
|
|
9
|
+
def module_name(cls) -> str:
|
|
10
|
+
return "databricks"
|
|
22
11
|
|
|
23
12
|
|
|
24
13
|
def require_databricks_sdk():
|
|
@@ -34,9 +23,24 @@ def require_databricks_sdk():
|
|
|
34
23
|
)
|
|
35
24
|
|
|
36
25
|
|
|
26
|
+
try:
|
|
27
|
+
import databricks
|
|
28
|
+
import databricks.sdk
|
|
29
|
+
|
|
30
|
+
from databricks.sdk import WorkspaceClient
|
|
31
|
+
|
|
32
|
+
databricks = databricks
|
|
33
|
+
databricks_sdk = databricks.sdk
|
|
34
|
+
except ImportError:
|
|
35
|
+
databricks = DatabricksDummyClass
|
|
36
|
+
databricks_sdk = DatabricksDummyClass
|
|
37
|
+
WorkspaceClient = DatabricksDummyClass
|
|
38
|
+
|
|
39
|
+
|
|
37
40
|
__all__ = [
|
|
38
41
|
"databricks",
|
|
39
42
|
"databricks_sdk",
|
|
40
43
|
"require_databricks_sdk",
|
|
41
|
-
"WorkspaceClient"
|
|
44
|
+
"WorkspaceClient",
|
|
45
|
+
"DatabricksDummyClass"
|
|
42
46
|
]
|
|
@@ -7,7 +7,6 @@ from typing import List, Union, Optional, Iterable, Callable, TYPE_CHECKING, Map
|
|
|
7
7
|
|
|
8
8
|
import pyarrow as pa
|
|
9
9
|
|
|
10
|
-
from ..pandaslib import pandas
|
|
11
10
|
from ..sparklib import (
|
|
12
11
|
pyspark,
|
|
13
12
|
SparkDataFrame,
|
|
@@ -18,6 +17,7 @@ from ..sparklib import (
|
|
|
18
17
|
from ...types.cast.registry import convert
|
|
19
18
|
|
|
20
19
|
if TYPE_CHECKING: # pragma: no cover
|
|
20
|
+
from ..pandaslib import pandas
|
|
21
21
|
from ...types.cast.cast_options import CastOptions
|
|
22
22
|
|
|
23
23
|
if pyspark is not None:
|
yggdrasil/libs/pandaslib.py
CHANGED
|
@@ -1,15 +1,22 @@
|
|
|
1
1
|
"""Optional pandas dependency helpers."""
|
|
2
2
|
|
|
3
|
+
from ..types.dummy_class import DummyModuleClass
|
|
4
|
+
|
|
5
|
+
class DummyPandasClass(DummyModuleClass):
|
|
6
|
+
|
|
7
|
+
@classmethod
|
|
8
|
+
def module_name(cls) -> str:
|
|
9
|
+
return "pandas"
|
|
10
|
+
|
|
11
|
+
|
|
3
12
|
try:
|
|
4
13
|
import pandas # type: ignore
|
|
5
|
-
pandas = pandas
|
|
6
|
-
|
|
7
14
|
PandasDataFrame = pandas.DataFrame
|
|
15
|
+
PandasSeries = pandas.Series
|
|
8
16
|
except ImportError:
|
|
9
17
|
pandas = None
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
pass
|
|
18
|
+
PandasDataFrame = DummyPandasClass
|
|
19
|
+
PandasSeries = DummyPandasClass
|
|
13
20
|
|
|
14
21
|
|
|
15
22
|
def require_pandas():
|
|
@@ -28,5 +35,7 @@ def require_pandas():
|
|
|
28
35
|
__all__ = [
|
|
29
36
|
"pandas",
|
|
30
37
|
"require_pandas",
|
|
31
|
-
"PandasDataFrame"
|
|
38
|
+
"PandasDataFrame",
|
|
39
|
+
"PandasSeries",
|
|
40
|
+
"DummyPandasClass",
|
|
32
41
|
]
|
yggdrasil/libs/polarslib.py
CHANGED
|
@@ -1,4 +1,13 @@
|
|
|
1
1
|
"""Optional Polars dependency helpers."""
|
|
2
|
+
from yggdrasil.types.dummy_class import DummyModuleClass
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class DummyPolarsClass(DummyModuleClass):
|
|
6
|
+
|
|
7
|
+
@classmethod
|
|
8
|
+
def module_name(cls) -> str:
|
|
9
|
+
return "polars"
|
|
10
|
+
|
|
2
11
|
|
|
3
12
|
try:
|
|
4
13
|
import polars # type: ignore
|
|
@@ -6,27 +15,54 @@ try:
|
|
|
6
15
|
polars = polars
|
|
7
16
|
|
|
8
17
|
PolarsDataFrame = polars.DataFrame
|
|
18
|
+
PolarsSeries = polars.Series
|
|
19
|
+
PolarsExpr = polars.Expr
|
|
20
|
+
PolarsDataFrame = polars.DataFrame
|
|
21
|
+
PolarsField = polars.Field
|
|
22
|
+
PolarsSchema = polars.Schema
|
|
23
|
+
PolarsDataType = polars.DataType
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def require_polars():
|
|
27
|
+
"""Ensure polars is available before using polars helpers.
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
None.
|
|
31
|
+
"""
|
|
32
|
+
return None
|
|
33
|
+
|
|
9
34
|
except ImportError:
|
|
10
35
|
polars = None
|
|
36
|
+
PolarsDataFrame = DummyPolarsClass
|
|
37
|
+
PolarsSeries = DummyPolarsClass
|
|
38
|
+
PolarsExpr = DummyPolarsClass
|
|
39
|
+
PolarsDataFrame = DummyPolarsClass
|
|
40
|
+
PolarsField = DummyPolarsClass
|
|
41
|
+
PolarsSchema = DummyPolarsClass
|
|
42
|
+
PolarsDataType = DummyPolarsClass
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def require_polars():
|
|
46
|
+
"""Ensure polars is available before using polars helpers.
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
None.
|
|
50
|
+
"""
|
|
51
|
+
import polars
|
|
11
52
|
|
|
12
|
-
class PolarsDataFrame:
|
|
13
|
-
pass
|
|
14
53
|
|
|
15
54
|
__all__ = [
|
|
16
55
|
"polars",
|
|
17
56
|
"require_polars",
|
|
18
|
-
"PolarsDataFrame"
|
|
57
|
+
"PolarsDataFrame",
|
|
58
|
+
"PolarsSeries",
|
|
59
|
+
"PolarsExpr",
|
|
60
|
+
"PolarsDataFrame",
|
|
61
|
+
"PolarsField",
|
|
62
|
+
"PolarsSchema",
|
|
63
|
+
"PolarsDataType",
|
|
64
|
+
"DummyPolarsClass"
|
|
19
65
|
]
|
|
20
66
|
|
|
21
67
|
|
|
22
|
-
def require_polars():
|
|
23
|
-
"""Ensure polars is available before using polars helpers.
|
|
24
68
|
|
|
25
|
-
Returns:
|
|
26
|
-
None.
|
|
27
|
-
"""
|
|
28
|
-
if polars is None:
|
|
29
|
-
raise ImportError(
|
|
30
|
-
"polars is required to use this function. "
|
|
31
|
-
"Install it with `pip install polars`."
|
|
32
|
-
)
|
yggdrasil/pyutils/__init__.py
CHANGED
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
"""Python utility helpers for retries, parallelism, and environment management."""
|
|
2
2
|
|
|
3
|
+
from .mimetypes import *
|
|
3
4
|
from .retry import retry
|
|
4
5
|
from .parallel import parallelize
|
|
5
6
|
from .python_env import PythonEnv
|
|
6
7
|
from .callable_serde import CallableSerde
|
|
7
|
-
|
|
8
|
-
__all__ = ["retry", "parallelize", "PythonEnv", "CallableSerde"]
|