ygg 0.1.56__py3-none-any.whl → 0.1.60__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ygg-0.1.56.dist-info → ygg-0.1.60.dist-info}/METADATA +1 -1
- ygg-0.1.60.dist-info/RECORD +74 -0
- {ygg-0.1.56.dist-info → ygg-0.1.60.dist-info}/WHEEL +1 -1
- yggdrasil/ai/__init__.py +2 -0
- yggdrasil/ai/session.py +89 -0
- yggdrasil/ai/sql_session.py +310 -0
- yggdrasil/databricks/__init__.py +0 -3
- yggdrasil/databricks/compute/cluster.py +68 -113
- yggdrasil/databricks/compute/command_execution.py +674 -0
- yggdrasil/databricks/compute/exceptions.py +7 -2
- yggdrasil/databricks/compute/execution_context.py +465 -277
- yggdrasil/databricks/compute/remote.py +4 -14
- yggdrasil/databricks/exceptions.py +10 -0
- yggdrasil/databricks/sql/__init__.py +0 -4
- yggdrasil/databricks/sql/engine.py +161 -173
- yggdrasil/databricks/sql/exceptions.py +9 -1
- yggdrasil/databricks/sql/statement_result.py +108 -120
- yggdrasil/databricks/sql/warehouse.py +331 -92
- yggdrasil/databricks/workspaces/io.py +92 -9
- yggdrasil/databricks/workspaces/path.py +120 -74
- yggdrasil/databricks/workspaces/workspace.py +212 -68
- yggdrasil/libs/databrickslib.py +23 -18
- yggdrasil/libs/extensions/spark_extensions.py +1 -1
- yggdrasil/libs/pandaslib.py +15 -6
- yggdrasil/libs/polarslib.py +49 -13
- yggdrasil/pyutils/__init__.py +1 -0
- yggdrasil/pyutils/callable_serde.py +12 -19
- yggdrasil/pyutils/exceptions.py +16 -0
- yggdrasil/pyutils/mimetypes.py +0 -0
- yggdrasil/pyutils/python_env.py +13 -12
- yggdrasil/pyutils/waiting_config.py +171 -0
- yggdrasil/types/cast/arrow_cast.py +3 -0
- yggdrasil/types/cast/pandas_cast.py +157 -169
- yggdrasil/types/cast/polars_cast.py +11 -43
- yggdrasil/types/dummy_class.py +81 -0
- yggdrasil/version.py +1 -1
- ygg-0.1.56.dist-info/RECORD +0 -68
- yggdrasil/databricks/ai/__init__.py +0 -1
- yggdrasil/databricks/ai/loki.py +0 -374
- {ygg-0.1.56.dist-info → ygg-0.1.60.dist-info}/entry_points.txt +0 -0
- {ygg-0.1.56.dist-info → ygg-0.1.60.dist-info}/licenses/LICENSE +0 -0
- {ygg-0.1.56.dist-info → ygg-0.1.60.dist-info}/top_level.txt +0 -0
|
@@ -4,29 +4,40 @@ import dataclasses
|
|
|
4
4
|
import logging
|
|
5
5
|
import os
|
|
6
6
|
import posixpath
|
|
7
|
+
import time
|
|
7
8
|
from abc import ABC
|
|
8
9
|
from dataclasses import dataclass
|
|
9
10
|
from pathlib import Path
|
|
11
|
+
from threading import Thread
|
|
10
12
|
from typing import (
|
|
11
13
|
BinaryIO,
|
|
12
14
|
Iterator,
|
|
13
15
|
Optional,
|
|
14
|
-
Union, TYPE_CHECKING, List
|
|
16
|
+
Union, TYPE_CHECKING, List, Set
|
|
15
17
|
)
|
|
16
18
|
|
|
17
|
-
if TYPE_CHECKING:
|
|
18
|
-
from ..compute.cluster import Cluster
|
|
19
|
-
|
|
20
19
|
from .path import DatabricksPath, DatabricksPathKind
|
|
20
|
+
from ...libs.databrickslib import databricks_sdk, WorkspaceClient, DatabricksDummyClass
|
|
21
|
+
from ...pyutils.expiring_dict import ExpiringDict
|
|
22
|
+
from ...pyutils.waiting_config import WaitingConfig, WaitingConfigArg
|
|
21
23
|
from ...version import __version__ as YGGDRASIL_VERSION
|
|
22
|
-
from ...libs.databrickslib import require_databricks_sdk, databricks_sdk
|
|
23
24
|
|
|
24
25
|
if databricks_sdk is not None:
|
|
25
|
-
from databricks.sdk import WorkspaceClient
|
|
26
26
|
from databricks.sdk.errors import ResourceDoesNotExist
|
|
27
27
|
from databricks.sdk.service.workspace import ExportFormat, ObjectInfo
|
|
28
28
|
from databricks.sdk.dbutils import FileInfo
|
|
29
29
|
from databricks.sdk.service.files import DirectoryEntry
|
|
30
|
+
else:
|
|
31
|
+
ResourceDoesNotExist = DatabricksDummyClass
|
|
32
|
+
ExportFormat = DatabricksDummyClass
|
|
33
|
+
ObjectInfo = DatabricksDummyClass
|
|
34
|
+
FileInfo = DatabricksDummyClass
|
|
35
|
+
DirectoryEntry = DatabricksDummyClass
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
if TYPE_CHECKING:
|
|
39
|
+
from ..sql.warehouse import SQLWarehouse
|
|
40
|
+
from ..compute.cluster import Cluster
|
|
30
41
|
|
|
31
42
|
|
|
32
43
|
__all__ = [
|
|
@@ -36,7 +47,26 @@ __all__ = [
|
|
|
36
47
|
]
|
|
37
48
|
|
|
38
49
|
|
|
39
|
-
|
|
50
|
+
LOGGER = logging.getLogger(__name__)
|
|
51
|
+
CHECKED_TMP_WORKSPACES: ExpiringDict[str, Set[str]] = ExpiringDict()
|
|
52
|
+
|
|
53
|
+
def is_checked_tmp_path(
|
|
54
|
+
host: str,
|
|
55
|
+
base_path: str
|
|
56
|
+
):
|
|
57
|
+
existing = CHECKED_TMP_WORKSPACES.get(host)
|
|
58
|
+
|
|
59
|
+
if existing is None:
|
|
60
|
+
CHECKED_TMP_WORKSPACES[host] = set(base_path)
|
|
61
|
+
|
|
62
|
+
return False
|
|
63
|
+
|
|
64
|
+
if base_path in existing:
|
|
65
|
+
return True
|
|
66
|
+
|
|
67
|
+
existing.add(base_path)
|
|
68
|
+
|
|
69
|
+
return False
|
|
40
70
|
|
|
41
71
|
|
|
42
72
|
# ---------------------------------------------------------------------------
|
|
@@ -47,7 +77,7 @@ def _get_env_product():
|
|
|
47
77
|
|
|
48
78
|
if not v:
|
|
49
79
|
return "yggdrasil"
|
|
50
|
-
return v
|
|
80
|
+
return v
|
|
51
81
|
|
|
52
82
|
|
|
53
83
|
def _get_env_product_version():
|
|
@@ -57,7 +87,7 @@ def _get_env_product_version():
|
|
|
57
87
|
if _get_env_product() == "yggdrasil":
|
|
58
88
|
return YGGDRASIL_VERSION
|
|
59
89
|
return None
|
|
60
|
-
return v
|
|
90
|
+
return v
|
|
61
91
|
|
|
62
92
|
|
|
63
93
|
def _get_env_product_tag():
|
|
@@ -65,7 +95,7 @@ def _get_env_product_tag():
|
|
|
65
95
|
|
|
66
96
|
if not v:
|
|
67
97
|
return None
|
|
68
|
-
return v
|
|
98
|
+
return v
|
|
69
99
|
|
|
70
100
|
|
|
71
101
|
@dataclass
|
|
@@ -115,8 +145,12 @@ class Workspace:
|
|
|
115
145
|
_cached_token: Optional[str] = dataclasses.field(default=None, repr=False, compare=False, hash=False)
|
|
116
146
|
|
|
117
147
|
# -------------------------
|
|
118
|
-
#
|
|
148
|
+
# Python methods
|
|
119
149
|
# -------------------------
|
|
150
|
+
def __post_init__(self):
|
|
151
|
+
self.product = self.product.strip().lower() if self.product else "yggdrasil"
|
|
152
|
+
self.product_tag = self.product_tag.strip().lower() if self.product_tag else "main"
|
|
153
|
+
|
|
120
154
|
def __getstate__(self):
|
|
121
155
|
"""Serialize the workspace state for pickling.
|
|
122
156
|
|
|
@@ -245,8 +279,6 @@ class Workspace:
|
|
|
245
279
|
|
|
246
280
|
instance = self.clone_instance() if clone else self
|
|
247
281
|
|
|
248
|
-
require_databricks_sdk()
|
|
249
|
-
|
|
250
282
|
# Build Config from config_dict if available, else from fields.
|
|
251
283
|
kwargs = {
|
|
252
284
|
"host": instance.host,
|
|
@@ -357,7 +389,7 @@ class Workspace:
|
|
|
357
389
|
@property
|
|
358
390
|
def safe_host(self):
|
|
359
391
|
if not self.host:
|
|
360
|
-
|
|
392
|
+
self.host = self.sdk().config.host
|
|
361
393
|
return self.host
|
|
362
394
|
|
|
363
395
|
@property
|
|
@@ -392,7 +424,7 @@ class Workspace:
|
|
|
392
424
|
# ------------------------------------------------------------------ #
|
|
393
425
|
# Path helpers
|
|
394
426
|
# ------------------------------------------------------------------ #
|
|
395
|
-
def
|
|
427
|
+
def filesystem(
|
|
396
428
|
self,
|
|
397
429
|
workspace: Optional["Workspace"] = None,
|
|
398
430
|
):
|
|
@@ -418,7 +450,8 @@ class Workspace:
|
|
|
418
450
|
self,
|
|
419
451
|
parts: Union[List[str], str],
|
|
420
452
|
kind: Optional[DatabricksPathKind] = None,
|
|
421
|
-
workspace: Optional["Workspace"] = None
|
|
453
|
+
workspace: Optional["Workspace"] = None,
|
|
454
|
+
temporary: bool = False
|
|
422
455
|
):
|
|
423
456
|
"""Create a DatabricksPath in this workspace.
|
|
424
457
|
|
|
@@ -426,6 +459,7 @@ class Workspace:
|
|
|
426
459
|
parts: Path parts or string to parse.
|
|
427
460
|
kind: Optional path kind override.
|
|
428
461
|
workspace: Optional workspace override.
|
|
462
|
+
temporary: Temporary path
|
|
429
463
|
|
|
430
464
|
Returns:
|
|
431
465
|
A DatabricksPath instance.
|
|
@@ -435,15 +469,141 @@ class Workspace:
|
|
|
435
469
|
if kind is None or isinstance(parts, str):
|
|
436
470
|
return DatabricksPath.parse(
|
|
437
471
|
obj=parts,
|
|
438
|
-
workspace=workspace
|
|
472
|
+
workspace=workspace,
|
|
473
|
+
temporary=temporary
|
|
439
474
|
)
|
|
440
475
|
|
|
441
476
|
return DatabricksPath(
|
|
442
477
|
kind=kind,
|
|
443
478
|
parts=parts,
|
|
479
|
+
temporary=temporary,
|
|
444
480
|
_workspace=workspace
|
|
445
481
|
)
|
|
446
482
|
|
|
483
|
+
@staticmethod
|
|
484
|
+
def _base_tmp_path(
|
|
485
|
+
catalog_name: Optional[str] = None,
|
|
486
|
+
schema_name: Optional[str] = None,
|
|
487
|
+
volume_name: Optional[str] = None,
|
|
488
|
+
) -> str:
|
|
489
|
+
if catalog_name and schema_name:
|
|
490
|
+
base_path = "/Volumes/%s/%s/%s" % (
|
|
491
|
+
catalog_name, schema_name, volume_name or "tmp"
|
|
492
|
+
)
|
|
493
|
+
else:
|
|
494
|
+
base_path = "/Workspace/Shared/.ygg/tmp"
|
|
495
|
+
|
|
496
|
+
return base_path
|
|
497
|
+
|
|
498
|
+
def tmp_path(
|
|
499
|
+
self,
|
|
500
|
+
suffix: Optional[str] = None,
|
|
501
|
+
extension: Optional[str] = None,
|
|
502
|
+
max_lifetime: Optional[float] = None,
|
|
503
|
+
catalog_name: Optional[str] = None,
|
|
504
|
+
schema_name: Optional[str] = None,
|
|
505
|
+
volume_name: Optional[str] = None,
|
|
506
|
+
base_path: Optional[str] = None,
|
|
507
|
+
) -> DatabricksPath:
|
|
508
|
+
"""
|
|
509
|
+
Shared cache base under Volumes for the current user.
|
|
510
|
+
|
|
511
|
+
Args:
|
|
512
|
+
suffix: Optional suffix
|
|
513
|
+
extension: Optional extension suffix to append.
|
|
514
|
+
max_lifetime: Max lifetime of temporary path
|
|
515
|
+
catalog_name: Unity catalog name for volume path
|
|
516
|
+
schema_name: Unity schema name for volume path
|
|
517
|
+
volume_name: Unity volume name for volume path
|
|
518
|
+
base_path: Base temporary path
|
|
519
|
+
|
|
520
|
+
Returns:
|
|
521
|
+
A DatabricksPath pointing at the shared cache location.
|
|
522
|
+
"""
|
|
523
|
+
start = int(time.time() * 1000)
|
|
524
|
+
max_lifetime = max_lifetime or 48.0 * 3600.0
|
|
525
|
+
end = int(start + max_lifetime)
|
|
526
|
+
|
|
527
|
+
base_path = base_path or self._base_tmp_path(
|
|
528
|
+
catalog_name=catalog_name,
|
|
529
|
+
schema_name=schema_name,
|
|
530
|
+
volume_name=volume_name
|
|
531
|
+
)
|
|
532
|
+
|
|
533
|
+
rnd = os.urandom(4).hex()
|
|
534
|
+
temp_path = f"tmp-{start}-{end}-{rnd}"
|
|
535
|
+
|
|
536
|
+
if suffix:
|
|
537
|
+
temp_path += suffix
|
|
538
|
+
|
|
539
|
+
if extension:
|
|
540
|
+
temp_path += ".%s" % extension
|
|
541
|
+
|
|
542
|
+
self.clean_tmp_folder(
|
|
543
|
+
raise_error=False,
|
|
544
|
+
wait=False,
|
|
545
|
+
base_path=base_path
|
|
546
|
+
)
|
|
547
|
+
|
|
548
|
+
return self.dbfs_path(f"{base_path}/{temp_path}")
|
|
549
|
+
|
|
550
|
+
def clean_tmp_folder(
|
|
551
|
+
self,
|
|
552
|
+
raise_error: bool = True,
|
|
553
|
+
wait: Optional[WaitingConfigArg] = True,
|
|
554
|
+
catalog_name: Optional[str] = None,
|
|
555
|
+
schema_name: Optional[str] = None,
|
|
556
|
+
volume_name: Optional[str] = None,
|
|
557
|
+
base_path: Optional[str] = None,
|
|
558
|
+
):
|
|
559
|
+
wait = WaitingConfig.check_arg(wait)
|
|
560
|
+
|
|
561
|
+
base_path = base_path or self._base_tmp_path(
|
|
562
|
+
catalog_name=catalog_name,
|
|
563
|
+
schema_name=schema_name,
|
|
564
|
+
volume_name=volume_name
|
|
565
|
+
)
|
|
566
|
+
|
|
567
|
+
if is_checked_tmp_path(host=self.safe_host, base_path=base_path):
|
|
568
|
+
return self
|
|
569
|
+
|
|
570
|
+
if wait.timeout:
|
|
571
|
+
base_path = self.dbfs_path(base_path)
|
|
572
|
+
|
|
573
|
+
LOGGER.debug(
|
|
574
|
+
"Cleaning temp path %s",
|
|
575
|
+
base_path
|
|
576
|
+
)
|
|
577
|
+
|
|
578
|
+
try:
|
|
579
|
+
for path in base_path.ls(recursive=False, allow_not_found=True):
|
|
580
|
+
parts = path.name.split("-")
|
|
581
|
+
|
|
582
|
+
if len(parts) > 2 and parts[0] == "tmp" and parts[1].isdigit() and parts[2].isdigit():
|
|
583
|
+
end = int(parts[2]) / 1000.0
|
|
584
|
+
|
|
585
|
+
if end and time.time() > end:
|
|
586
|
+
path.remove(recursive=True)
|
|
587
|
+
except Exception as e:
|
|
588
|
+
if raise_error:
|
|
589
|
+
raise e
|
|
590
|
+
LOGGER.warning(e)
|
|
591
|
+
|
|
592
|
+
LOGGER.info(
|
|
593
|
+
"Cleaned temp path %s",
|
|
594
|
+
base_path
|
|
595
|
+
)
|
|
596
|
+
else:
|
|
597
|
+
Thread(
|
|
598
|
+
target=self.clean_tmp_folder,
|
|
599
|
+
kwargs={
|
|
600
|
+
"raise_error": raise_error,
|
|
601
|
+
"base_path": base_path
|
|
602
|
+
}
|
|
603
|
+
).start()
|
|
604
|
+
|
|
605
|
+
return self
|
|
606
|
+
|
|
447
607
|
def shared_cache_path(
|
|
448
608
|
self,
|
|
449
609
|
suffix: Optional[str] = None
|
|
@@ -469,13 +629,13 @@ class Workspace:
|
|
|
469
629
|
# SDK access / connection
|
|
470
630
|
# ------------------------------------------------------------------ #
|
|
471
631
|
|
|
472
|
-
def sdk(self) ->
|
|
632
|
+
def sdk(self) -> WorkspaceClient:
|
|
473
633
|
"""Return the connected WorkspaceClient.
|
|
474
634
|
|
|
475
635
|
Returns:
|
|
476
636
|
The WorkspaceClient instance.
|
|
477
637
|
"""
|
|
478
|
-
return self.connect()._sdk
|
|
638
|
+
return self.connect(clone=False)._sdk
|
|
479
639
|
|
|
480
640
|
# ------------------------------------------------------------------ #
|
|
481
641
|
# List / open / delete / SQL
|
|
@@ -529,14 +689,15 @@ class Workspace:
|
|
|
529
689
|
yield from self.list_path(child_path, recursive=True)
|
|
530
690
|
return
|
|
531
691
|
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
692
|
+
else:
|
|
693
|
+
# Workspace files / notebooks
|
|
694
|
+
try:
|
|
695
|
+
entries = list(sdk.workspace.list(path, recursive=recursive))
|
|
696
|
+
except ResourceDoesNotExist:
|
|
697
|
+
return
|
|
537
698
|
|
|
538
|
-
|
|
539
|
-
|
|
699
|
+
for obj in entries:
|
|
700
|
+
yield obj
|
|
540
701
|
|
|
541
702
|
def open_path(
|
|
542
703
|
self,
|
|
@@ -582,20 +743,29 @@ class Workspace:
|
|
|
582
743
|
"""
|
|
583
744
|
return os.getenv("DATABRICKS_RUNTIME_VERSION") is not None
|
|
584
745
|
|
|
585
|
-
def default_tags(self):
|
|
746
|
+
def default_tags(self, update: bool = True):
|
|
586
747
|
"""Return default resource tags for Databricks assets.
|
|
587
748
|
|
|
588
749
|
Returns:
|
|
589
750
|
A dict of default tags.
|
|
590
751
|
"""
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
752
|
+
if update:
|
|
753
|
+
base = {
|
|
754
|
+
k: v
|
|
755
|
+
for k, v in (
|
|
756
|
+
("Product", self.product),
|
|
757
|
+
)
|
|
758
|
+
if v
|
|
759
|
+
}
|
|
760
|
+
else:
|
|
761
|
+
base = {
|
|
762
|
+
k: v
|
|
763
|
+
for k, v in (
|
|
764
|
+
("Product", self.product),
|
|
765
|
+
("ProductTag", self.product_tag),
|
|
766
|
+
)
|
|
767
|
+
if v
|
|
768
|
+
}
|
|
599
769
|
|
|
600
770
|
if self.custom_tags:
|
|
601
771
|
base.update(self.custom_tags)
|
|
@@ -605,7 +775,7 @@ class Workspace:
|
|
|
605
775
|
def sql(
|
|
606
776
|
self,
|
|
607
777
|
workspace: Optional["Workspace"] = None,
|
|
608
|
-
|
|
778
|
+
warehouse: Optional["SQLWarehouse"] = None,
|
|
609
779
|
catalog_name: Optional[str] = None,
|
|
610
780
|
schema_name: Optional[str] = None,
|
|
611
781
|
):
|
|
@@ -613,7 +783,7 @@ class Workspace:
|
|
|
613
783
|
|
|
614
784
|
Args:
|
|
615
785
|
workspace: Optional workspace override.
|
|
616
|
-
|
|
786
|
+
warehouse: Optional SQL warehouse.
|
|
617
787
|
catalog_name: Optional catalog name.
|
|
618
788
|
schema_name: Optional schema name.
|
|
619
789
|
|
|
@@ -622,11 +792,13 @@ class Workspace:
|
|
|
622
792
|
"""
|
|
623
793
|
from ..sql import SQLEngine
|
|
624
794
|
|
|
795
|
+
workspace = self if workspace is None else workspace
|
|
796
|
+
|
|
625
797
|
return SQLEngine(
|
|
626
|
-
workspace=
|
|
627
|
-
warehouse_id=warehouse_id,
|
|
798
|
+
workspace=workspace,
|
|
628
799
|
catalog_name=catalog_name,
|
|
629
800
|
schema_name=schema_name,
|
|
801
|
+
_warehouse=warehouse,
|
|
630
802
|
)
|
|
631
803
|
|
|
632
804
|
def warehouses(
|
|
@@ -667,25 +839,6 @@ class Workspace:
|
|
|
667
839
|
cluster_name=cluster_name,
|
|
668
840
|
)
|
|
669
841
|
|
|
670
|
-
def loki(
|
|
671
|
-
self,
|
|
672
|
-
workspace: Optional["Workspace"] = None,
|
|
673
|
-
):
|
|
674
|
-
"""
|
|
675
|
-
Return a Cluster helper bound to this workspace.
|
|
676
|
-
|
|
677
|
-
Args:
|
|
678
|
-
workspace: Optional workspace override.
|
|
679
|
-
|
|
680
|
-
Returns:
|
|
681
|
-
A Loki AI instance.
|
|
682
|
-
"""
|
|
683
|
-
from ..ai.loki import Loki
|
|
684
|
-
|
|
685
|
-
return Loki(
|
|
686
|
-
workspace=self if workspace is None else workspace,
|
|
687
|
-
)
|
|
688
|
-
|
|
689
842
|
# ---------------------------------------------------------------------------
|
|
690
843
|
# Workspace-bound base class
|
|
691
844
|
# ---------------------------------------------------------------------------
|
|
@@ -698,15 +851,6 @@ class WorkspaceService(ABC):
|
|
|
698
851
|
"""Base class for helpers that depend on a Workspace."""
|
|
699
852
|
workspace: Workspace = dataclasses.field(default_factory=Workspace)
|
|
700
853
|
|
|
701
|
-
def __post_init__(self):
|
|
702
|
-
"""Ensure a Workspace instance is available.
|
|
703
|
-
|
|
704
|
-
Returns:
|
|
705
|
-
None.
|
|
706
|
-
"""
|
|
707
|
-
if self.workspace is None:
|
|
708
|
-
self.workspace = Workspace()
|
|
709
|
-
|
|
710
854
|
def __enter__(self):
|
|
711
855
|
"""Enter a context manager and connect the workspace.
|
|
712
856
|
|
|
@@ -751,7 +895,7 @@ class WorkspaceService(ABC):
|
|
|
751
895
|
parts: Union[List[str], str],
|
|
752
896
|
kind: Optional[DatabricksPathKind] = None,
|
|
753
897
|
workspace: Optional["Workspace"] = None
|
|
754
|
-
):
|
|
898
|
+
) -> "DatabricksPath":
|
|
755
899
|
"""Create a DatabricksPath in the underlying workspace.
|
|
756
900
|
|
|
757
901
|
Args:
|
yggdrasil/libs/databrickslib.py
CHANGED
|
@@ -1,24 +1,13 @@
|
|
|
1
1
|
"""Optional Databricks SDK dependency helpers."""
|
|
2
|
+
from yggdrasil.types.dummy_class import DummyModuleClass
|
|
2
3
|
|
|
3
|
-
try:
|
|
4
|
-
import databricks
|
|
5
|
-
import databricks.sdk # type: ignore
|
|
6
4
|
|
|
7
|
-
|
|
5
|
+
class DatabricksDummyClass(DummyModuleClass):
|
|
6
|
+
"""Placeholder object that raises if Databricks SDK is required."""
|
|
8
7
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
class _DatabricksDummy:
|
|
13
|
-
"""Placeholder object that raises if Databricks SDK is required."""
|
|
14
|
-
def __getattr__(self, item):
|
|
15
|
-
"""Raise an error when accessing missing Databricks SDK attributes."""
|
|
16
|
-
require_databricks_sdk()
|
|
17
|
-
|
|
18
|
-
databricks = _DatabricksDummy
|
|
19
|
-
databricks_sdk = _DatabricksDummy
|
|
20
|
-
|
|
21
|
-
WorkspaceClient = _DatabricksDummy
|
|
8
|
+
@classmethod
|
|
9
|
+
def module_name(cls) -> str:
|
|
10
|
+
return "databricks"
|
|
22
11
|
|
|
23
12
|
|
|
24
13
|
def require_databricks_sdk():
|
|
@@ -34,9 +23,25 @@ def require_databricks_sdk():
|
|
|
34
23
|
)
|
|
35
24
|
|
|
36
25
|
|
|
26
|
+
try:
|
|
27
|
+
import databricks
|
|
28
|
+
import databricks.sdk # type: ignore
|
|
29
|
+
|
|
30
|
+
from databricks.sdk import WorkspaceClient
|
|
31
|
+
|
|
32
|
+
databricks = databricks
|
|
33
|
+
databricks_sdk = databricks.sdk
|
|
34
|
+
except ImportError:
|
|
35
|
+
databricks = DatabricksDummyClass
|
|
36
|
+
databricks_sdk = DatabricksDummyClass
|
|
37
|
+
|
|
38
|
+
WorkspaceClient = DatabricksDummyClass
|
|
39
|
+
|
|
40
|
+
|
|
37
41
|
__all__ = [
|
|
38
42
|
"databricks",
|
|
39
43
|
"databricks_sdk",
|
|
40
44
|
"require_databricks_sdk",
|
|
41
|
-
"WorkspaceClient"
|
|
45
|
+
"WorkspaceClient",
|
|
46
|
+
"DatabricksDummyClass"
|
|
42
47
|
]
|
|
@@ -7,7 +7,6 @@ from typing import List, Union, Optional, Iterable, Callable, TYPE_CHECKING, Map
|
|
|
7
7
|
|
|
8
8
|
import pyarrow as pa
|
|
9
9
|
|
|
10
|
-
from ..pandaslib import pandas
|
|
11
10
|
from ..sparklib import (
|
|
12
11
|
pyspark,
|
|
13
12
|
SparkDataFrame,
|
|
@@ -18,6 +17,7 @@ from ..sparklib import (
|
|
|
18
17
|
from ...types.cast.registry import convert
|
|
19
18
|
|
|
20
19
|
if TYPE_CHECKING: # pragma: no cover
|
|
20
|
+
from ..pandaslib import pandas
|
|
21
21
|
from ...types.cast.cast_options import CastOptions
|
|
22
22
|
|
|
23
23
|
if pyspark is not None:
|
yggdrasil/libs/pandaslib.py
CHANGED
|
@@ -1,15 +1,22 @@
|
|
|
1
1
|
"""Optional pandas dependency helpers."""
|
|
2
2
|
|
|
3
|
+
from ..types.dummy_class import DummyModuleClass
|
|
4
|
+
|
|
5
|
+
class DummyPandasClass(DummyModuleClass):
|
|
6
|
+
|
|
7
|
+
@classmethod
|
|
8
|
+
def module_name(cls) -> str:
|
|
9
|
+
return "pandas"
|
|
10
|
+
|
|
11
|
+
|
|
3
12
|
try:
|
|
4
13
|
import pandas # type: ignore
|
|
5
|
-
pandas = pandas
|
|
6
|
-
|
|
7
14
|
PandasDataFrame = pandas.DataFrame
|
|
15
|
+
PandasSeries = pandas.Series
|
|
8
16
|
except ImportError:
|
|
9
17
|
pandas = None
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
pass
|
|
18
|
+
PandasDataFrame = DummyPandasClass
|
|
19
|
+
PandasSeries = DummyPandasClass
|
|
13
20
|
|
|
14
21
|
|
|
15
22
|
def require_pandas():
|
|
@@ -28,5 +35,7 @@ def require_pandas():
|
|
|
28
35
|
__all__ = [
|
|
29
36
|
"pandas",
|
|
30
37
|
"require_pandas",
|
|
31
|
-
"PandasDataFrame"
|
|
38
|
+
"PandasDataFrame",
|
|
39
|
+
"PandasSeries",
|
|
40
|
+
"DummyPandasClass",
|
|
32
41
|
]
|
yggdrasil/libs/polarslib.py
CHANGED
|
@@ -1,4 +1,13 @@
|
|
|
1
1
|
"""Optional Polars dependency helpers."""
|
|
2
|
+
from yggdrasil.types.dummy_class import DummyModuleClass
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class DummyPolarsClass(DummyModuleClass):
|
|
6
|
+
|
|
7
|
+
@classmethod
|
|
8
|
+
def module_name(cls) -> str:
|
|
9
|
+
return "polars"
|
|
10
|
+
|
|
2
11
|
|
|
3
12
|
try:
|
|
4
13
|
import polars # type: ignore
|
|
@@ -6,27 +15,54 @@ try:
|
|
|
6
15
|
polars = polars
|
|
7
16
|
|
|
8
17
|
PolarsDataFrame = polars.DataFrame
|
|
18
|
+
PolarsSeries = polars.Series
|
|
19
|
+
PolarsExpr = polars.Expr
|
|
20
|
+
PolarsDataFrame = polars.DataFrame
|
|
21
|
+
PolarsField = polars.Field
|
|
22
|
+
PolarsSchema = polars.Schema
|
|
23
|
+
PolarsDataType = polars.DataType
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def require_polars():
|
|
27
|
+
"""Ensure polars is available before using polars helpers.
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
None.
|
|
31
|
+
"""
|
|
32
|
+
return None
|
|
33
|
+
|
|
9
34
|
except ImportError:
|
|
10
35
|
polars = None
|
|
36
|
+
PolarsDataFrame = DummyPolarsClass
|
|
37
|
+
PolarsSeries = DummyPolarsClass
|
|
38
|
+
PolarsExpr = DummyPolarsClass
|
|
39
|
+
PolarsDataFrame = DummyPolarsClass
|
|
40
|
+
PolarsField = DummyPolarsClass
|
|
41
|
+
PolarsSchema = DummyPolarsClass
|
|
42
|
+
PolarsDataType = DummyPolarsClass
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def require_polars():
|
|
46
|
+
"""Ensure polars is available before using polars helpers.
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
None.
|
|
50
|
+
"""
|
|
51
|
+
import polars
|
|
11
52
|
|
|
12
|
-
class PolarsDataFrame:
|
|
13
|
-
pass
|
|
14
53
|
|
|
15
54
|
__all__ = [
|
|
16
55
|
"polars",
|
|
17
56
|
"require_polars",
|
|
18
|
-
"PolarsDataFrame"
|
|
57
|
+
"PolarsDataFrame",
|
|
58
|
+
"PolarsSeries",
|
|
59
|
+
"PolarsExpr",
|
|
60
|
+
"PolarsDataFrame",
|
|
61
|
+
"PolarsField",
|
|
62
|
+
"PolarsSchema",
|
|
63
|
+
"PolarsDataType",
|
|
64
|
+
"DummyPolarsClass"
|
|
19
65
|
]
|
|
20
66
|
|
|
21
67
|
|
|
22
|
-
def require_polars():
|
|
23
|
-
"""Ensure polars is available before using polars helpers.
|
|
24
68
|
|
|
25
|
-
Returns:
|
|
26
|
-
None.
|
|
27
|
-
"""
|
|
28
|
-
if polars is None:
|
|
29
|
-
raise ImportError(
|
|
30
|
-
"polars is required to use this function. "
|
|
31
|
-
"Install it with `pip install polars`."
|
|
32
|
-
)
|