ygg 0.1.31__py3-none-any.whl → 0.1.32__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ygg-0.1.31.dist-info → ygg-0.1.32.dist-info}/METADATA +1 -1
- ygg-0.1.32.dist-info/RECORD +60 -0
- yggdrasil/__init__.py +2 -0
- yggdrasil/databricks/__init__.py +2 -0
- yggdrasil/databricks/compute/__init__.py +2 -0
- yggdrasil/databricks/compute/cluster.py +241 -2
- yggdrasil/databricks/compute/execution_context.py +100 -11
- yggdrasil/databricks/compute/remote.py +16 -0
- yggdrasil/databricks/jobs/__init__.py +5 -0
- yggdrasil/databricks/jobs/config.py +29 -4
- yggdrasil/databricks/sql/__init__.py +2 -0
- yggdrasil/databricks/sql/engine.py +217 -36
- yggdrasil/databricks/sql/exceptions.py +1 -0
- yggdrasil/databricks/sql/statement_result.py +147 -0
- yggdrasil/databricks/sql/types.py +33 -1
- yggdrasil/databricks/workspaces/__init__.py +2 -1
- yggdrasil/databricks/workspaces/filesytem.py +183 -0
- yggdrasil/databricks/workspaces/io.py +387 -9
- yggdrasil/databricks/workspaces/path.py +297 -2
- yggdrasil/databricks/workspaces/path_kind.py +3 -0
- yggdrasil/databricks/workspaces/workspace.py +202 -5
- yggdrasil/dataclasses/__init__.py +2 -0
- yggdrasil/dataclasses/dataclass.py +42 -1
- yggdrasil/libs/__init__.py +2 -0
- yggdrasil/libs/databrickslib.py +9 -0
- yggdrasil/libs/extensions/__init__.py +2 -0
- yggdrasil/libs/extensions/polars_extensions.py +72 -0
- yggdrasil/libs/extensions/spark_extensions.py +116 -0
- yggdrasil/libs/pandaslib.py +7 -0
- yggdrasil/libs/polarslib.py +7 -0
- yggdrasil/libs/sparklib.py +41 -0
- yggdrasil/pyutils/__init__.py +4 -0
- yggdrasil/pyutils/callable_serde.py +106 -0
- yggdrasil/pyutils/exceptions.py +16 -0
- yggdrasil/pyutils/modules.py +44 -1
- yggdrasil/pyutils/parallel.py +29 -0
- yggdrasil/pyutils/python_env.py +301 -0
- yggdrasil/pyutils/retry.py +57 -0
- yggdrasil/requests/__init__.py +4 -0
- yggdrasil/requests/msal.py +124 -3
- yggdrasil/requests/session.py +18 -0
- yggdrasil/types/__init__.py +2 -0
- yggdrasil/types/cast/__init__.py +2 -1
- yggdrasil/types/cast/arrow_cast.py +123 -1
- yggdrasil/types/cast/cast_options.py +119 -1
- yggdrasil/types/cast/pandas_cast.py +29 -0
- yggdrasil/types/cast/polars_cast.py +47 -0
- yggdrasil/types/cast/polars_pandas_cast.py +29 -0
- yggdrasil/types/cast/registry.py +176 -0
- yggdrasil/types/cast/spark_cast.py +76 -0
- yggdrasil/types/cast/spark_pandas_cast.py +29 -0
- yggdrasil/types/cast/spark_polars_cast.py +28 -0
- yggdrasil/types/libs.py +2 -0
- yggdrasil/types/python_arrow.py +191 -0
- yggdrasil/types/python_defaults.py +73 -0
- yggdrasil/version.py +1 -0
- ygg-0.1.31.dist-info/RECORD +0 -59
- {ygg-0.1.31.dist-info → ygg-0.1.32.dist-info}/WHEEL +0 -0
- {ygg-0.1.31.dist-info → ygg-0.1.32.dist-info}/entry_points.txt +0 -0
- {ygg-0.1.31.dist-info → ygg-0.1.32.dist-info}/licenses/LICENSE +0 -0
- {ygg-0.1.31.dist-info → ygg-0.1.32.dist-info}/top_level.txt +0 -0
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
"""Pandas <-> Arrow casting helpers and converters."""
|
|
2
|
+
|
|
1
3
|
from typing import Optional
|
|
2
4
|
|
|
3
5
|
import pyarrow as pa
|
|
@@ -33,18 +35,45 @@ if pandas is not None:
|
|
|
33
35
|
PandasDataFrame = pandas.DataFrame
|
|
34
36
|
|
|
35
37
|
def pandas_converter(*args, **kwargs):
|
|
38
|
+
"""Return a register_converter wrapper when pandas is available.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
*args: Converter registration args.
|
|
42
|
+
**kwargs: Converter registration kwargs.
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
Converter decorator.
|
|
46
|
+
"""
|
|
36
47
|
return register_converter(*args, **kwargs)
|
|
37
48
|
|
|
38
49
|
else:
|
|
39
50
|
# Dummy types so annotations/decorators don't explode without pandas
|
|
40
51
|
class _PandasDummy: # pragma: no cover - only used when pandas not installed
|
|
52
|
+
"""Placeholder type for pandas symbols when pandas is unavailable."""
|
|
41
53
|
pass
|
|
42
54
|
|
|
43
55
|
PandasSeries = _PandasDummy
|
|
44
56
|
PandasDataFrame = _PandasDummy
|
|
45
57
|
|
|
46
58
|
def pandas_converter(*_args, **_kwargs): # pragma: no cover - no-op decorator
|
|
59
|
+
"""Return a no-op decorator when pandas is unavailable.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
*_args: Ignored positional args.
|
|
63
|
+
**_kwargs: Ignored keyword args.
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
No-op decorator.
|
|
67
|
+
"""
|
|
47
68
|
def _decorator(func):
|
|
69
|
+
"""Return the function unchanged.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
func: Callable to return.
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
Unchanged callable.
|
|
76
|
+
"""
|
|
48
77
|
return func
|
|
49
78
|
|
|
50
79
|
return _decorator
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
"""Polars <-> Arrow casting helpers and converters."""
|
|
2
|
+
|
|
1
3
|
from typing import Optional, Tuple, Union, Dict, Any
|
|
2
4
|
|
|
3
5
|
import pyarrow as pa
|
|
@@ -79,12 +81,22 @@ if polars is not None:
|
|
|
79
81
|
}
|
|
80
82
|
|
|
81
83
|
def polars_converter(*args, **kwargs):
|
|
84
|
+
"""Return a register_converter wrapper when polars is available.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
*args: Converter registration args.
|
|
88
|
+
**kwargs: Converter registration kwargs.
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
Converter decorator.
|
|
92
|
+
"""
|
|
82
93
|
return register_converter(*args, **kwargs)
|
|
83
94
|
else:
|
|
84
95
|
ARROW_TO_POLARS = {}
|
|
85
96
|
|
|
86
97
|
# Dummy types so annotations/decorators don't explode without Polars
|
|
87
98
|
class _PolarsDummy: # pragma: no cover - only used when Polars not installed
|
|
99
|
+
"""Placeholder type for polars symbols when polars is unavailable."""
|
|
88
100
|
pass
|
|
89
101
|
|
|
90
102
|
PolarsSeries = _PolarsDummy
|
|
@@ -95,7 +107,24 @@ else:
|
|
|
95
107
|
PolarsDataType = _PolarsDummy
|
|
96
108
|
|
|
97
109
|
def polars_converter(*_args, **_kwargs): # pragma: no cover - no-op decorator
|
|
110
|
+
"""Return a no-op decorator when polars is unavailable.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
*_args: Ignored positional args.
|
|
114
|
+
**_kwargs: Ignored keyword args.
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
No-op decorator.
|
|
118
|
+
"""
|
|
98
119
|
def _decorator(func):
|
|
120
|
+
"""Return the function unchanged.
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
func: Callable to return.
|
|
124
|
+
|
|
125
|
+
Returns:
|
|
126
|
+
Unchanged callable.
|
|
127
|
+
"""
|
|
99
128
|
return func
|
|
100
129
|
return _decorator
|
|
101
130
|
|
|
@@ -171,6 +200,15 @@ def cast_to_list_array(
|
|
|
171
200
|
array: PolarsSeries,
|
|
172
201
|
options: Optional["CastOptions"] = None,
|
|
173
202
|
) -> PolarsSeries:
|
|
203
|
+
"""Cast a Polars list series to a target list Arrow type.
|
|
204
|
+
|
|
205
|
+
Args:
|
|
206
|
+
array: Polars Series with list dtype.
|
|
207
|
+
options: Optional cast options.
|
|
208
|
+
|
|
209
|
+
Returns:
|
|
210
|
+
Casted Polars Series.
|
|
211
|
+
"""
|
|
174
212
|
options = CastOptions.check_arg(options)
|
|
175
213
|
|
|
176
214
|
if not options.need_polars_type_cast(source_obj=array):
|
|
@@ -796,6 +834,15 @@ def polars_array_to_arrow_field(
|
|
|
796
834
|
array: Union[PolarsSeries, PolarsExpr],
|
|
797
835
|
options: Optional[CastOptions] = None
|
|
798
836
|
) -> pa.Field:
|
|
837
|
+
"""Infer an Arrow field from a Polars Series or Expr.
|
|
838
|
+
|
|
839
|
+
Args:
|
|
840
|
+
array: Polars Series or Expr.
|
|
841
|
+
options: Optional cast options.
|
|
842
|
+
|
|
843
|
+
Returns:
|
|
844
|
+
Arrow field.
|
|
845
|
+
"""
|
|
799
846
|
options = CastOptions.check_arg(options)
|
|
800
847
|
|
|
801
848
|
if options.source_arrow_field:
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
"""Polars <-> pandas conversion helpers via Arrow."""
|
|
2
|
+
|
|
1
3
|
from typing import Optional
|
|
2
4
|
|
|
3
5
|
from .arrow_cast import CastOptions
|
|
@@ -35,18 +37,45 @@ if polars is not None and pandas is not None:
|
|
|
35
37
|
PandasDataFrame = pandas.DataFrame
|
|
36
38
|
|
|
37
39
|
def polars_pandas_converter(*args, **kwargs):
|
|
40
|
+
"""Return a register_converter wrapper when both libs are available.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
*args: Converter registration args.
|
|
44
|
+
**kwargs: Converter registration kwargs.
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
Converter decorator.
|
|
48
|
+
"""
|
|
38
49
|
return register_converter(*args, **kwargs)
|
|
39
50
|
|
|
40
51
|
else:
|
|
41
52
|
# Dummy stand-ins so decorators/annotations don't explode if one lib is absent
|
|
42
53
|
class _Dummy: # pragma: no cover - only used when Polars or pandas not installed
|
|
54
|
+
"""Placeholder type when Polars or pandas are unavailable."""
|
|
43
55
|
pass
|
|
44
56
|
|
|
45
57
|
PolarsDataFrame = _Dummy
|
|
46
58
|
PandasDataFrame = _Dummy
|
|
47
59
|
|
|
48
60
|
def polars_pandas_converter(*_args, **_kwargs): # pragma: no cover - no-op decorator
|
|
61
|
+
"""Return a no-op decorator when dependencies are missing.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
*_args: Ignored positional args.
|
|
65
|
+
**_kwargs: Ignored keyword args.
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
No-op decorator.
|
|
69
|
+
"""
|
|
49
70
|
def _decorator(func):
|
|
71
|
+
"""Return the function unchanged.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
func: Callable to return.
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
Unchanged callable.
|
|
78
|
+
"""
|
|
50
79
|
return func
|
|
51
80
|
|
|
52
81
|
return _decorator
|
yggdrasil/types/cast/registry.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
"""Type conversion registry and default converters."""
|
|
2
|
+
|
|
1
3
|
from __future__ import annotations
|
|
2
4
|
|
|
3
5
|
import dataclasses as _dataclasses
|
|
@@ -31,6 +33,15 @@ __all__ = [
|
|
|
31
33
|
|
|
32
34
|
|
|
33
35
|
def _identity(x, opt):
|
|
36
|
+
"""Return the input value unchanged.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
x: Value to return.
|
|
40
|
+
opt: Unused options parameter.
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
The input value.
|
|
44
|
+
"""
|
|
34
45
|
return x
|
|
35
46
|
|
|
36
47
|
ReturnType = TypeVar("ReturnType")
|
|
@@ -49,6 +60,14 @@ def register_converter(
|
|
|
49
60
|
"""
|
|
50
61
|
|
|
51
62
|
def decorator(func: Callable[..., ReturnType]) -> Converter:
|
|
63
|
+
"""Validate and register a converter function.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
func: Converter function to register.
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
Registered converter.
|
|
70
|
+
"""
|
|
52
71
|
sig = inspect.signature(func)
|
|
53
72
|
params = list(sig.parameters.values())
|
|
54
73
|
if any(
|
|
@@ -75,6 +94,14 @@ def register_converter(
|
|
|
75
94
|
|
|
76
95
|
|
|
77
96
|
def _unwrap_optional(hint: Any) -> Tuple[bool, Any]:
|
|
97
|
+
"""Return whether a hint is Optional and the underlying hint.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
hint: Type hint to inspect.
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
Tuple of (is_optional, base_hint).
|
|
104
|
+
"""
|
|
78
105
|
origin = get_origin(hint)
|
|
79
106
|
if origin in {Union, types.UnionType}:
|
|
80
107
|
args = get_args(hint)
|
|
@@ -114,6 +141,15 @@ def find_converter(
|
|
|
114
141
|
from_type: Any,
|
|
115
142
|
to_hint: Any
|
|
116
143
|
) -> Optional[Converter]:
|
|
144
|
+
"""Find a registered converter for the requested type pair.
|
|
145
|
+
|
|
146
|
+
Args:
|
|
147
|
+
from_type: Source type.
|
|
148
|
+
to_hint: Target type hint.
|
|
149
|
+
|
|
150
|
+
Returns:
|
|
151
|
+
Converter function or None.
|
|
152
|
+
"""
|
|
117
153
|
|
|
118
154
|
# 0) Fast path: exact key
|
|
119
155
|
conv = _registry.get((from_type, to_hint))
|
|
@@ -177,6 +213,17 @@ def find_converter(
|
|
|
177
213
|
|
|
178
214
|
# Build composite converter once we find the first viable chain.
|
|
179
215
|
def composed(value, options=None, _c1=conv1, _c2=conv2):
|
|
216
|
+
"""Compose two converters into one.
|
|
217
|
+
|
|
218
|
+
Args:
|
|
219
|
+
value: Value to convert.
|
|
220
|
+
options: Cast options.
|
|
221
|
+
_c1: First converter.
|
|
222
|
+
_c2: Second converter.
|
|
223
|
+
|
|
224
|
+
Returns:
|
|
225
|
+
Converted value.
|
|
226
|
+
"""
|
|
180
227
|
intermediate = _c1(value, options)
|
|
181
228
|
return _c2(intermediate, options)
|
|
182
229
|
|
|
@@ -190,6 +237,14 @@ def find_converter(
|
|
|
190
237
|
|
|
191
238
|
|
|
192
239
|
def _normalize_fractional_seconds(value: str) -> str:
|
|
240
|
+
"""Normalize fractional seconds to microsecond precision.
|
|
241
|
+
|
|
242
|
+
Args:
|
|
243
|
+
value: Datetime string to normalize.
|
|
244
|
+
|
|
245
|
+
Returns:
|
|
246
|
+
Normalized datetime string.
|
|
247
|
+
"""
|
|
193
248
|
match = re.search(r"(\.)(\d+)(?=(?:[+-]\d{2}:?\d{2})?$)", value)
|
|
194
249
|
if not match:
|
|
195
250
|
return value
|
|
@@ -201,6 +256,14 @@ def _normalize_fractional_seconds(value: str) -> str:
|
|
|
201
256
|
|
|
202
257
|
|
|
203
258
|
def is_runtime_value(x) -> bool:
|
|
259
|
+
"""Return True when x is a runtime value, not a type hint.
|
|
260
|
+
|
|
261
|
+
Args:
|
|
262
|
+
x: Value or type hint to inspect.
|
|
263
|
+
|
|
264
|
+
Returns:
|
|
265
|
+
True if runtime value.
|
|
266
|
+
"""
|
|
204
267
|
# True for "42", [], MyClass(), etc.
|
|
205
268
|
# False for MyClass, list[int], dict[str, int], etc.
|
|
206
269
|
if inspect.isclass(x):
|
|
@@ -305,6 +368,16 @@ def convert_to_python_enum(
|
|
|
305
368
|
target_hint: type,
|
|
306
369
|
options: Optional[CastOptions] = None,
|
|
307
370
|
):
|
|
371
|
+
"""Convert values into a Python Enum member.
|
|
372
|
+
|
|
373
|
+
Args:
|
|
374
|
+
value: Value to convert.
|
|
375
|
+
target_hint: Enum type.
|
|
376
|
+
options: Optional cast options.
|
|
377
|
+
|
|
378
|
+
Returns:
|
|
379
|
+
Enum member.
|
|
380
|
+
"""
|
|
308
381
|
if isinstance(value, target_hint):
|
|
309
382
|
return value
|
|
310
383
|
|
|
@@ -343,6 +416,16 @@ def convert_to_python_dataclass(
|
|
|
343
416
|
target_hint: type,
|
|
344
417
|
options: Optional[CastOptions] = None,
|
|
345
418
|
):
|
|
419
|
+
"""Convert a mapping into a dataclass instance.
|
|
420
|
+
|
|
421
|
+
Args:
|
|
422
|
+
value: Mapping of field values.
|
|
423
|
+
target_hint: Dataclass type.
|
|
424
|
+
options: Optional cast options.
|
|
425
|
+
|
|
426
|
+
Returns:
|
|
427
|
+
Dataclass instance.
|
|
428
|
+
"""
|
|
346
429
|
from yggdrasil.types.python_defaults import default_scalar
|
|
347
430
|
|
|
348
431
|
if isinstance(value, target_hint):
|
|
@@ -385,6 +468,18 @@ def convert_to_python_iterable(
|
|
|
385
468
|
target_args,
|
|
386
469
|
options: Optional[CastOptions] = None,
|
|
387
470
|
):
|
|
471
|
+
"""Convert iterable-like values into typed Python collections.
|
|
472
|
+
|
|
473
|
+
Args:
|
|
474
|
+
value: Iterable-like input value.
|
|
475
|
+
target_hint: Target type hint.
|
|
476
|
+
target_origin: Target container origin type.
|
|
477
|
+
target_args: Target type arguments.
|
|
478
|
+
options: Optional cast options.
|
|
479
|
+
|
|
480
|
+
Returns:
|
|
481
|
+
Converted iterable container.
|
|
482
|
+
"""
|
|
388
483
|
if isinstance(value, (str, bytes)):
|
|
389
484
|
raise TypeError(f"No converter registered for {type(value)} -> {target_hint}")
|
|
390
485
|
|
|
@@ -411,6 +506,15 @@ def convert_to_python_iterable(
|
|
|
411
506
|
|
|
412
507
|
@register_converter(str, int)
|
|
413
508
|
def _str_to_int(value: str, cast_options: Any) -> int:
|
|
509
|
+
"""Convert a string into an integer.
|
|
510
|
+
|
|
511
|
+
Args:
|
|
512
|
+
value: String to convert.
|
|
513
|
+
cast_options: Cast options.
|
|
514
|
+
|
|
515
|
+
Returns:
|
|
516
|
+
Integer value.
|
|
517
|
+
"""
|
|
414
518
|
if value == "":
|
|
415
519
|
return 0
|
|
416
520
|
return int(value)
|
|
@@ -418,6 +522,15 @@ def _str_to_int(value: str, cast_options: Any) -> int:
|
|
|
418
522
|
|
|
419
523
|
@register_converter(str, float)
|
|
420
524
|
def _str_to_float(value: str, cast_options: Any) -> float:
|
|
525
|
+
"""Convert a string into a float.
|
|
526
|
+
|
|
527
|
+
Args:
|
|
528
|
+
value: String to convert.
|
|
529
|
+
cast_options: Cast options.
|
|
530
|
+
|
|
531
|
+
Returns:
|
|
532
|
+
Float value.
|
|
533
|
+
"""
|
|
421
534
|
default_value = getattr(cast_options, "default_value", None)
|
|
422
535
|
if value == "" and default_value is not None:
|
|
423
536
|
return default_value
|
|
@@ -426,6 +539,15 @@ def _str_to_float(value: str, cast_options: Any) -> float:
|
|
|
426
539
|
|
|
427
540
|
@register_converter(str, bool)
|
|
428
541
|
def _str_to_bool(value: str, cast_options: Any) -> bool:
|
|
542
|
+
"""Convert a string into a boolean.
|
|
543
|
+
|
|
544
|
+
Args:
|
|
545
|
+
value: String to convert.
|
|
546
|
+
cast_options: Cast options.
|
|
547
|
+
|
|
548
|
+
Returns:
|
|
549
|
+
Boolean value.
|
|
550
|
+
"""
|
|
429
551
|
default_value = getattr(cast_options, "default_value", None)
|
|
430
552
|
if value == "" and default_value is not None:
|
|
431
553
|
return default_value
|
|
@@ -441,11 +563,29 @@ def _str_to_bool(value: str, cast_options: Any) -> bool:
|
|
|
441
563
|
|
|
442
564
|
@register_converter(str, _datetime.date)
|
|
443
565
|
def _str_to_date(value: str, cast_options: Any) -> _datetime.date:
|
|
566
|
+
"""Convert a string into a date.
|
|
567
|
+
|
|
568
|
+
Args:
|
|
569
|
+
value: String to convert.
|
|
570
|
+
cast_options: Cast options.
|
|
571
|
+
|
|
572
|
+
Returns:
|
|
573
|
+
Date value.
|
|
574
|
+
"""
|
|
444
575
|
return _str_to_datetime(value, cast_options).date()
|
|
445
576
|
|
|
446
577
|
|
|
447
578
|
@register_converter(str, _datetime.datetime)
|
|
448
579
|
def _str_to_datetime(value: str, cast_options: Any) -> _datetime.datetime:
|
|
580
|
+
"""Convert a string into a datetime.
|
|
581
|
+
|
|
582
|
+
Args:
|
|
583
|
+
value: String to convert.
|
|
584
|
+
cast_options: Cast options.
|
|
585
|
+
|
|
586
|
+
Returns:
|
|
587
|
+
Datetime value.
|
|
588
|
+
"""
|
|
449
589
|
default_value = getattr(cast_options, "default_value", None)
|
|
450
590
|
if value == "" and default_value is not None:
|
|
451
591
|
return default_value
|
|
@@ -488,6 +628,15 @@ def _str_to_datetime(value: str, cast_options: Any) -> _datetime.datetime:
|
|
|
488
628
|
|
|
489
629
|
@register_converter(str, _datetime.timedelta)
|
|
490
630
|
def _str_to_timedelta(value: str, cast_options: Any) -> _datetime.timedelta:
|
|
631
|
+
"""Convert a string into a timedelta.
|
|
632
|
+
|
|
633
|
+
Args:
|
|
634
|
+
value: String to convert.
|
|
635
|
+
cast_options: Cast options.
|
|
636
|
+
|
|
637
|
+
Returns:
|
|
638
|
+
Timedelta value.
|
|
639
|
+
"""
|
|
491
640
|
default_value = getattr(cast_options, "default_value", None)
|
|
492
641
|
stripped = value.strip()
|
|
493
642
|
|
|
@@ -545,6 +694,15 @@ def _str_to_timedelta(value: str, cast_options: Any) -> _datetime.timedelta:
|
|
|
545
694
|
|
|
546
695
|
@register_converter(str, _datetime.time)
|
|
547
696
|
def _str_to_time(value: str, cast_options: Any) -> _datetime.time:
|
|
697
|
+
"""Convert a string into a time.
|
|
698
|
+
|
|
699
|
+
Args:
|
|
700
|
+
value: String to convert.
|
|
701
|
+
cast_options: Cast options.
|
|
702
|
+
|
|
703
|
+
Returns:
|
|
704
|
+
Time value.
|
|
705
|
+
"""
|
|
548
706
|
default_value = getattr(cast_options, "default_value", None)
|
|
549
707
|
if value == "" and default_value is not None:
|
|
550
708
|
return default_value
|
|
@@ -553,9 +711,27 @@ def _str_to_time(value: str, cast_options: Any) -> _datetime.time:
|
|
|
553
711
|
|
|
554
712
|
@register_converter(_datetime.datetime, _datetime.date)
|
|
555
713
|
def _datetime_to_date(value: _datetime.datetime, cast_options: Any) -> _datetime.date:
|
|
714
|
+
"""Convert a datetime into a date.
|
|
715
|
+
|
|
716
|
+
Args:
|
|
717
|
+
value: Datetime value.
|
|
718
|
+
cast_options: Cast options.
|
|
719
|
+
|
|
720
|
+
Returns:
|
|
721
|
+
Date value.
|
|
722
|
+
"""
|
|
556
723
|
return value.date()
|
|
557
724
|
|
|
558
725
|
|
|
559
726
|
@register_converter(int, str)
|
|
560
727
|
def _int_to_str(value: int, cast_options: Any) -> str:
|
|
728
|
+
"""Convert an integer into a string.
|
|
729
|
+
|
|
730
|
+
Args:
|
|
731
|
+
value: Integer to convert.
|
|
732
|
+
cast_options: Cast options.
|
|
733
|
+
|
|
734
|
+
Returns:
|
|
735
|
+
String value.
|
|
736
|
+
"""
|
|
561
737
|
return str(value)
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
"""Spark <-> Arrow casting helpers and converters."""
|
|
2
|
+
|
|
1
3
|
from typing import Optional, Tuple, List
|
|
2
4
|
|
|
3
5
|
import pyarrow as pa
|
|
@@ -57,10 +59,20 @@ if pyspark is not None:
|
|
|
57
59
|
SparkStructField = T.StructField
|
|
58
60
|
|
|
59
61
|
def spark_converter(*args, **kwargs):
|
|
62
|
+
"""Return a register_converter wrapper when pyspark is available.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
*args: Converter registration args.
|
|
66
|
+
**kwargs: Converter registration kwargs.
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
Converter decorator.
|
|
70
|
+
"""
|
|
60
71
|
return register_converter(*args, **kwargs)
|
|
61
72
|
|
|
62
73
|
else: # pyspark missing -> dummies + no-op decorator
|
|
63
74
|
class _SparkDummy: # pragma: no cover
|
|
75
|
+
"""Placeholder type for Spark symbols when pyspark is unavailable."""
|
|
64
76
|
pass
|
|
65
77
|
|
|
66
78
|
SparkDataFrame = _SparkDummy
|
|
@@ -70,7 +82,24 @@ else: # pyspark missing -> dummies + no-op decorator
|
|
|
70
82
|
SparkStructField = _SparkDummy
|
|
71
83
|
|
|
72
84
|
def spark_converter(*_args, **_kwargs): # pragma: no cover
|
|
85
|
+
"""Return a no-op decorator when pyspark is unavailable.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
*_args: Ignored positional args.
|
|
89
|
+
**_kwargs: Ignored keyword args.
|
|
90
|
+
|
|
91
|
+
Returns:
|
|
92
|
+
No-op decorator.
|
|
93
|
+
"""
|
|
73
94
|
def _decorator(func):
|
|
95
|
+
"""Return the function unchanged.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
func: Callable to return.
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
Unchanged callable.
|
|
102
|
+
"""
|
|
74
103
|
return func
|
|
75
104
|
|
|
76
105
|
return _decorator
|
|
@@ -227,6 +256,17 @@ def check_column_nullability(
|
|
|
227
256
|
target_field: "T.StructField",
|
|
228
257
|
mask: "pyspark.sql.Column"
|
|
229
258
|
) -> "pyspark.sql.Column":
|
|
259
|
+
"""Fill nulls when the target field is non-nullable.
|
|
260
|
+
|
|
261
|
+
Args:
|
|
262
|
+
column: Spark column to adjust.
|
|
263
|
+
source_field: Source Spark field.
|
|
264
|
+
target_field: Target Spark field.
|
|
265
|
+
mask: Null mask column.
|
|
266
|
+
|
|
267
|
+
Returns:
|
|
268
|
+
Updated Spark column.
|
|
269
|
+
"""
|
|
230
270
|
source_nullable = True if source_field is None else source_field.nullable
|
|
231
271
|
target_nullable = True if target_field is None else target_field.nullable
|
|
232
272
|
|
|
@@ -532,6 +572,15 @@ def spark_dataframe_to_spark_type(
|
|
|
532
572
|
df: SparkDataFrame,
|
|
533
573
|
options: Optional[CastOptions] = None,
|
|
534
574
|
) -> pa.DataType:
|
|
575
|
+
"""Return the Spark DataFrame schema as a Spark data type.
|
|
576
|
+
|
|
577
|
+
Args:
|
|
578
|
+
df: Spark DataFrame.
|
|
579
|
+
options: Optional cast options.
|
|
580
|
+
|
|
581
|
+
Returns:
|
|
582
|
+
Spark DataType.
|
|
583
|
+
"""
|
|
535
584
|
return df.schema
|
|
536
585
|
|
|
537
586
|
|
|
@@ -540,6 +589,15 @@ def spark_dataframe_to_spark_field(
|
|
|
540
589
|
df: SparkDataFrame,
|
|
541
590
|
options: Optional[CastOptions] = None,
|
|
542
591
|
) -> pa.DataType:
|
|
592
|
+
"""Return a Spark StructField for the DataFrame schema.
|
|
593
|
+
|
|
594
|
+
Args:
|
|
595
|
+
df: Spark DataFrame.
|
|
596
|
+
options: Optional cast options.
|
|
597
|
+
|
|
598
|
+
Returns:
|
|
599
|
+
Spark StructField.
|
|
600
|
+
"""
|
|
543
601
|
return SparkStructField(
|
|
544
602
|
df.getAlias() or "root",
|
|
545
603
|
df.schema,
|
|
@@ -552,6 +610,15 @@ def spark_dataframe_to_arrow_field(
|
|
|
552
610
|
df: SparkDataFrame,
|
|
553
611
|
options: Optional[CastOptions] = None,
|
|
554
612
|
) -> pa.DataType:
|
|
613
|
+
"""Return an Arrow field representation of the DataFrame schema.
|
|
614
|
+
|
|
615
|
+
Args:
|
|
616
|
+
df: Spark DataFrame.
|
|
617
|
+
options: Optional cast options.
|
|
618
|
+
|
|
619
|
+
Returns:
|
|
620
|
+
Arrow field.
|
|
621
|
+
"""
|
|
555
622
|
return spark_field_to_arrow_field(
|
|
556
623
|
spark_dataframe_to_spark_field(df, options),
|
|
557
624
|
options
|
|
@@ -563,6 +630,15 @@ def spark_dataframe_to_arrow_schema(
|
|
|
563
630
|
df: SparkDataFrame,
|
|
564
631
|
options: Optional[CastOptions] = None,
|
|
565
632
|
) -> pa.DataType:
|
|
633
|
+
"""Return an Arrow schema representation of the DataFrame.
|
|
634
|
+
|
|
635
|
+
Args:
|
|
636
|
+
df: Spark DataFrame.
|
|
637
|
+
options: Optional cast options.
|
|
638
|
+
|
|
639
|
+
Returns:
|
|
640
|
+
Arrow schema.
|
|
641
|
+
"""
|
|
566
642
|
return arrow_field_to_schema(
|
|
567
643
|
spark_field_to_arrow_field(
|
|
568
644
|
spark_dataframe_to_spark_field(df, options),
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
"""Spark <-> pandas conversion helpers via Arrow."""
|
|
2
|
+
|
|
1
3
|
from typing import Optional
|
|
2
4
|
|
|
3
5
|
from .arrow_cast import CastOptions
|
|
@@ -34,18 +36,45 @@ if pyspark is not None and pandas is not None:
|
|
|
34
36
|
PandasDataFrame = pandas.DataFrame
|
|
35
37
|
|
|
36
38
|
def spark_pandas_converter(*args, **kwargs):
|
|
39
|
+
"""Return a register_converter wrapper when dependencies are available.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
*args: Converter registration args.
|
|
43
|
+
**kwargs: Converter registration kwargs.
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
Converter decorator.
|
|
47
|
+
"""
|
|
37
48
|
return register_converter(*args, **kwargs)
|
|
38
49
|
|
|
39
50
|
else:
|
|
40
51
|
# Dummy stand-ins so decorators/annotations don't explode if one lib is absent
|
|
41
52
|
class _Dummy: # pragma: no cover - only used when Spark or pandas not installed
|
|
53
|
+
"""Placeholder type when Spark or pandas are unavailable."""
|
|
42
54
|
pass
|
|
43
55
|
|
|
44
56
|
SparkDataFrame = _Dummy
|
|
45
57
|
PandasDataFrame = _Dummy
|
|
46
58
|
|
|
47
59
|
def spark_pandas_converter(*_args, **_kwargs): # pragma: no cover - no-op decorator
|
|
60
|
+
"""Return a no-op decorator when dependencies are missing.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
*_args: Ignored positional args.
|
|
64
|
+
**_kwargs: Ignored keyword args.
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
No-op decorator.
|
|
68
|
+
"""
|
|
48
69
|
def _decorator(func):
|
|
70
|
+
"""Return the function unchanged.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
func: Callable to return.
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
Unchanged callable.
|
|
77
|
+
"""
|
|
49
78
|
return func
|
|
50
79
|
|
|
51
80
|
return _decorator
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
"""Spark <-> Polars conversion helpers via Arrow."""
|
|
2
|
+
|
|
1
3
|
from typing import Optional
|
|
2
4
|
|
|
3
5
|
import pyarrow as pa
|
|
@@ -23,10 +25,36 @@ __all__ = [
|
|
|
23
25
|
# ---------------------------------------------------------------------------
|
|
24
26
|
if pyspark is not None and polars is not None:
|
|
25
27
|
def spark_polars_converter(*args, **kwargs):
|
|
28
|
+
"""Return a register_converter wrapper when deps are available.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
*args: Converter registration args.
|
|
32
|
+
**kwargs: Converter registration kwargs.
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
Converter decorator.
|
|
36
|
+
"""
|
|
26
37
|
return register_converter(*args, **kwargs)
|
|
27
38
|
else:
|
|
28
39
|
def spark_polars_converter(*_args, **_kwargs): # pragma: no cover - no-op decorator
|
|
40
|
+
"""Return a no-op decorator when deps are missing.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
*_args: Ignored positional args.
|
|
44
|
+
**_kwargs: Ignored keyword args.
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
No-op decorator.
|
|
48
|
+
"""
|
|
29
49
|
def _decorator(func):
|
|
50
|
+
"""Return the function unchanged.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
func: Callable to return.
|
|
54
|
+
|
|
55
|
+
Returns:
|
|
56
|
+
Unchanged callable.
|
|
57
|
+
"""
|
|
30
58
|
return func
|
|
31
59
|
|
|
32
60
|
return _decorator
|