ygg 0.1.30__py3-none-any.whl → 0.1.32__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ygg-0.1.30.dist-info → ygg-0.1.32.dist-info}/METADATA +1 -1
- ygg-0.1.32.dist-info/RECORD +60 -0
- yggdrasil/__init__.py +2 -0
- yggdrasil/databricks/__init__.py +2 -0
- yggdrasil/databricks/compute/__init__.py +2 -0
- yggdrasil/databricks/compute/cluster.py +241 -2
- yggdrasil/databricks/compute/execution_context.py +100 -11
- yggdrasil/databricks/compute/remote.py +16 -0
- yggdrasil/databricks/jobs/__init__.py +5 -0
- yggdrasil/databricks/jobs/config.py +31 -34
- yggdrasil/databricks/sql/__init__.py +2 -0
- yggdrasil/databricks/sql/engine.py +217 -36
- yggdrasil/databricks/sql/exceptions.py +1 -0
- yggdrasil/databricks/sql/statement_result.py +148 -1
- yggdrasil/databricks/sql/types.py +49 -1
- yggdrasil/databricks/workspaces/__init__.py +4 -1
- yggdrasil/databricks/workspaces/filesytem.py +344 -0
- yggdrasil/databricks/workspaces/io.py +1123 -0
- yggdrasil/databricks/workspaces/path.py +1415 -0
- yggdrasil/databricks/workspaces/path_kind.py +13 -0
- yggdrasil/databricks/workspaces/workspace.py +298 -154
- yggdrasil/dataclasses/__init__.py +2 -0
- yggdrasil/dataclasses/dataclass.py +42 -1
- yggdrasil/libs/__init__.py +2 -0
- yggdrasil/libs/databrickslib.py +9 -0
- yggdrasil/libs/extensions/__init__.py +2 -0
- yggdrasil/libs/extensions/polars_extensions.py +72 -0
- yggdrasil/libs/extensions/spark_extensions.py +116 -0
- yggdrasil/libs/pandaslib.py +7 -0
- yggdrasil/libs/polarslib.py +7 -0
- yggdrasil/libs/sparklib.py +41 -0
- yggdrasil/pyutils/__init__.py +4 -0
- yggdrasil/pyutils/callable_serde.py +106 -0
- yggdrasil/pyutils/exceptions.py +16 -0
- yggdrasil/pyutils/modules.py +44 -1
- yggdrasil/pyutils/parallel.py +29 -0
- yggdrasil/pyutils/python_env.py +301 -0
- yggdrasil/pyutils/retry.py +57 -0
- yggdrasil/requests/__init__.py +4 -0
- yggdrasil/requests/msal.py +124 -3
- yggdrasil/requests/session.py +18 -0
- yggdrasil/types/__init__.py +2 -0
- yggdrasil/types/cast/__init__.py +2 -1
- yggdrasil/types/cast/arrow_cast.py +131 -0
- yggdrasil/types/cast/cast_options.py +119 -1
- yggdrasil/types/cast/pandas_cast.py +29 -0
- yggdrasil/types/cast/polars_cast.py +47 -0
- yggdrasil/types/cast/polars_pandas_cast.py +29 -0
- yggdrasil/types/cast/registry.py +176 -0
- yggdrasil/types/cast/spark_cast.py +76 -0
- yggdrasil/types/cast/spark_pandas_cast.py +29 -0
- yggdrasil/types/cast/spark_polars_cast.py +28 -0
- yggdrasil/types/libs.py +2 -0
- yggdrasil/types/python_arrow.py +191 -0
- yggdrasil/types/python_defaults.py +73 -0
- yggdrasil/version.py +1 -0
- ygg-0.1.30.dist-info/RECORD +0 -56
- yggdrasil/databricks/workspaces/databricks_path.py +0 -784
- {ygg-0.1.30.dist-info → ygg-0.1.32.dist-info}/WHEEL +0 -0
- {ygg-0.1.30.dist-info → ygg-0.1.32.dist-info}/entry_points.txt +0 -0
- {ygg-0.1.30.dist-info → ygg-0.1.32.dist-info}/licenses/LICENSE +0 -0
- {ygg-0.1.30.dist-info → ygg-0.1.32.dist-info}/top_level.txt +0 -0
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
"""Spark <-> Arrow casting helpers and converters."""
|
|
2
|
+
|
|
1
3
|
from typing import Optional, Tuple, List
|
|
2
4
|
|
|
3
5
|
import pyarrow as pa
|
|
@@ -57,10 +59,20 @@ if pyspark is not None:
|
|
|
57
59
|
SparkStructField = T.StructField
|
|
58
60
|
|
|
59
61
|
def spark_converter(*args, **kwargs):
|
|
62
|
+
"""Return a register_converter wrapper when pyspark is available.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
*args: Converter registration args.
|
|
66
|
+
**kwargs: Converter registration kwargs.
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
Converter decorator.
|
|
70
|
+
"""
|
|
60
71
|
return register_converter(*args, **kwargs)
|
|
61
72
|
|
|
62
73
|
else: # pyspark missing -> dummies + no-op decorator
|
|
63
74
|
class _SparkDummy: # pragma: no cover
|
|
75
|
+
"""Placeholder type for Spark symbols when pyspark is unavailable."""
|
|
64
76
|
pass
|
|
65
77
|
|
|
66
78
|
SparkDataFrame = _SparkDummy
|
|
@@ -70,7 +82,24 @@ else: # pyspark missing -> dummies + no-op decorator
|
|
|
70
82
|
SparkStructField = _SparkDummy
|
|
71
83
|
|
|
72
84
|
def spark_converter(*_args, **_kwargs): # pragma: no cover
|
|
85
|
+
"""Return a no-op decorator when pyspark is unavailable.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
*_args: Ignored positional args.
|
|
89
|
+
**_kwargs: Ignored keyword args.
|
|
90
|
+
|
|
91
|
+
Returns:
|
|
92
|
+
No-op decorator.
|
|
93
|
+
"""
|
|
73
94
|
def _decorator(func):
|
|
95
|
+
"""Return the function unchanged.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
func: Callable to return.
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
Unchanged callable.
|
|
102
|
+
"""
|
|
74
103
|
return func
|
|
75
104
|
|
|
76
105
|
return _decorator
|
|
@@ -227,6 +256,17 @@ def check_column_nullability(
|
|
|
227
256
|
target_field: "T.StructField",
|
|
228
257
|
mask: "pyspark.sql.Column"
|
|
229
258
|
) -> "pyspark.sql.Column":
|
|
259
|
+
"""Fill nulls when the target field is non-nullable.
|
|
260
|
+
|
|
261
|
+
Args:
|
|
262
|
+
column: Spark column to adjust.
|
|
263
|
+
source_field: Source Spark field.
|
|
264
|
+
target_field: Target Spark field.
|
|
265
|
+
mask: Null mask column.
|
|
266
|
+
|
|
267
|
+
Returns:
|
|
268
|
+
Updated Spark column.
|
|
269
|
+
"""
|
|
230
270
|
source_nullable = True if source_field is None else source_field.nullable
|
|
231
271
|
target_nullable = True if target_field is None else target_field.nullable
|
|
232
272
|
|
|
@@ -532,6 +572,15 @@ def spark_dataframe_to_spark_type(
|
|
|
532
572
|
df: SparkDataFrame,
|
|
533
573
|
options: Optional[CastOptions] = None,
|
|
534
574
|
) -> pa.DataType:
|
|
575
|
+
"""Return the Spark DataFrame schema as a Spark data type.
|
|
576
|
+
|
|
577
|
+
Args:
|
|
578
|
+
df: Spark DataFrame.
|
|
579
|
+
options: Optional cast options.
|
|
580
|
+
|
|
581
|
+
Returns:
|
|
582
|
+
Spark DataType.
|
|
583
|
+
"""
|
|
535
584
|
return df.schema
|
|
536
585
|
|
|
537
586
|
|
|
@@ -540,6 +589,15 @@ def spark_dataframe_to_spark_field(
|
|
|
540
589
|
df: SparkDataFrame,
|
|
541
590
|
options: Optional[CastOptions] = None,
|
|
542
591
|
) -> pa.DataType:
|
|
592
|
+
"""Return a Spark StructField for the DataFrame schema.
|
|
593
|
+
|
|
594
|
+
Args:
|
|
595
|
+
df: Spark DataFrame.
|
|
596
|
+
options: Optional cast options.
|
|
597
|
+
|
|
598
|
+
Returns:
|
|
599
|
+
Spark StructField.
|
|
600
|
+
"""
|
|
543
601
|
return SparkStructField(
|
|
544
602
|
df.getAlias() or "root",
|
|
545
603
|
df.schema,
|
|
@@ -552,6 +610,15 @@ def spark_dataframe_to_arrow_field(
|
|
|
552
610
|
df: SparkDataFrame,
|
|
553
611
|
options: Optional[CastOptions] = None,
|
|
554
612
|
) -> pa.DataType:
|
|
613
|
+
"""Return an Arrow field representation of the DataFrame schema.
|
|
614
|
+
|
|
615
|
+
Args:
|
|
616
|
+
df: Spark DataFrame.
|
|
617
|
+
options: Optional cast options.
|
|
618
|
+
|
|
619
|
+
Returns:
|
|
620
|
+
Arrow field.
|
|
621
|
+
"""
|
|
555
622
|
return spark_field_to_arrow_field(
|
|
556
623
|
spark_dataframe_to_spark_field(df, options),
|
|
557
624
|
options
|
|
@@ -563,6 +630,15 @@ def spark_dataframe_to_arrow_schema(
|
|
|
563
630
|
df: SparkDataFrame,
|
|
564
631
|
options: Optional[CastOptions] = None,
|
|
565
632
|
) -> pa.DataType:
|
|
633
|
+
"""Return an Arrow schema representation of the DataFrame.
|
|
634
|
+
|
|
635
|
+
Args:
|
|
636
|
+
df: Spark DataFrame.
|
|
637
|
+
options: Optional cast options.
|
|
638
|
+
|
|
639
|
+
Returns:
|
|
640
|
+
Arrow schema.
|
|
641
|
+
"""
|
|
566
642
|
return arrow_field_to_schema(
|
|
567
643
|
spark_field_to_arrow_field(
|
|
568
644
|
spark_dataframe_to_spark_field(df, options),
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
"""Spark <-> pandas conversion helpers via Arrow."""
|
|
2
|
+
|
|
1
3
|
from typing import Optional
|
|
2
4
|
|
|
3
5
|
from .arrow_cast import CastOptions
|
|
@@ -34,18 +36,45 @@ if pyspark is not None and pandas is not None:
|
|
|
34
36
|
PandasDataFrame = pandas.DataFrame
|
|
35
37
|
|
|
36
38
|
def spark_pandas_converter(*args, **kwargs):
|
|
39
|
+
"""Return a register_converter wrapper when dependencies are available.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
*args: Converter registration args.
|
|
43
|
+
**kwargs: Converter registration kwargs.
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
Converter decorator.
|
|
47
|
+
"""
|
|
37
48
|
return register_converter(*args, **kwargs)
|
|
38
49
|
|
|
39
50
|
else:
|
|
40
51
|
# Dummy stand-ins so decorators/annotations don't explode if one lib is absent
|
|
41
52
|
class _Dummy: # pragma: no cover - only used when Spark or pandas not installed
|
|
53
|
+
"""Placeholder type when Spark or pandas are unavailable."""
|
|
42
54
|
pass
|
|
43
55
|
|
|
44
56
|
SparkDataFrame = _Dummy
|
|
45
57
|
PandasDataFrame = _Dummy
|
|
46
58
|
|
|
47
59
|
def spark_pandas_converter(*_args, **_kwargs): # pragma: no cover - no-op decorator
|
|
60
|
+
"""Return a no-op decorator when dependencies are missing.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
*_args: Ignored positional args.
|
|
64
|
+
**_kwargs: Ignored keyword args.
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
No-op decorator.
|
|
68
|
+
"""
|
|
48
69
|
def _decorator(func):
|
|
70
|
+
"""Return the function unchanged.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
func: Callable to return.
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
Unchanged callable.
|
|
77
|
+
"""
|
|
49
78
|
return func
|
|
50
79
|
|
|
51
80
|
return _decorator
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
"""Spark <-> Polars conversion helpers via Arrow."""
|
|
2
|
+
|
|
1
3
|
from typing import Optional
|
|
2
4
|
|
|
3
5
|
import pyarrow as pa
|
|
@@ -23,10 +25,36 @@ __all__ = [
|
|
|
23
25
|
# ---------------------------------------------------------------------------
|
|
24
26
|
if pyspark is not None and polars is not None:
|
|
25
27
|
def spark_polars_converter(*args, **kwargs):
|
|
28
|
+
"""Return a register_converter wrapper when deps are available.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
*args: Converter registration args.
|
|
32
|
+
**kwargs: Converter registration kwargs.
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
Converter decorator.
|
|
36
|
+
"""
|
|
26
37
|
return register_converter(*args, **kwargs)
|
|
27
38
|
else:
|
|
28
39
|
def spark_polars_converter(*_args, **_kwargs): # pragma: no cover - no-op decorator
|
|
40
|
+
"""Return a no-op decorator when deps are missing.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
*_args: Ignored positional args.
|
|
44
|
+
**_kwargs: Ignored keyword args.
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
No-op decorator.
|
|
48
|
+
"""
|
|
29
49
|
def _decorator(func):
|
|
50
|
+
"""Return the function unchanged.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
func: Callable to return.
|
|
54
|
+
|
|
55
|
+
Returns:
|
|
56
|
+
Unchanged callable.
|
|
57
|
+
"""
|
|
30
58
|
return func
|
|
31
59
|
|
|
32
60
|
return _decorator
|
yggdrasil/types/libs.py
CHANGED
yggdrasil/types/python_arrow.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
"""Arrow type inference utilities from Python type hints."""
|
|
2
|
+
|
|
1
3
|
import dataclasses
|
|
2
4
|
import datetime
|
|
3
5
|
import decimal
|
|
@@ -57,6 +59,14 @@ _INT_UNITS_ORDER = {"s": 0, "ms": 1, "us": 2, "ns": 3}
|
|
|
57
59
|
|
|
58
60
|
|
|
59
61
|
def _is_optional(hint) -> bool:
|
|
62
|
+
"""Return True when the hint includes None.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
hint: Type hint to inspect.
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
True if Optional.
|
|
69
|
+
"""
|
|
60
70
|
origin = get_origin(hint)
|
|
61
71
|
|
|
62
72
|
if origin is Annotated:
|
|
@@ -69,6 +79,14 @@ def _is_optional(hint) -> bool:
|
|
|
69
79
|
|
|
70
80
|
|
|
71
81
|
def _strip_optional(hint):
|
|
82
|
+
"""Return the underlying hint without Optional[...].
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
hint: Type hint to inspect.
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
Hint without Optional wrapper.
|
|
89
|
+
"""
|
|
72
90
|
origin = get_origin(hint)
|
|
73
91
|
|
|
74
92
|
if origin is Annotated:
|
|
@@ -89,6 +107,15 @@ def _strip_optional(hint):
|
|
|
89
107
|
|
|
90
108
|
|
|
91
109
|
def _field_name(hint, index: int | None) -> str:
|
|
110
|
+
"""Derive a field name from a hint and optional index.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
hint: Type hint to inspect.
|
|
114
|
+
index: Optional positional index.
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
Field name string.
|
|
118
|
+
"""
|
|
92
119
|
name = getattr(hint, "__name__", None)
|
|
93
120
|
|
|
94
121
|
if name:
|
|
@@ -101,6 +128,14 @@ def _field_name(hint, index: int | None) -> str:
|
|
|
101
128
|
|
|
102
129
|
|
|
103
130
|
def _struct_from_dataclass(hint) -> pa.StructType:
|
|
131
|
+
"""Build an Arrow struct type from a dataclass.
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
hint: Dataclass type.
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
Arrow StructType.
|
|
138
|
+
"""
|
|
104
139
|
fields = []
|
|
105
140
|
|
|
106
141
|
for field in dataclasses.fields(hint):
|
|
@@ -113,6 +148,15 @@ def _struct_from_dataclass(hint) -> pa.StructType:
|
|
|
113
148
|
|
|
114
149
|
|
|
115
150
|
def _struct_from_tuple(args, names: list[str] | None = None) -> pa.StructType:
|
|
151
|
+
"""Build an Arrow struct type from tuple hints.
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
args: Tuple element type hints.
|
|
155
|
+
names: Optional field names.
|
|
156
|
+
|
|
157
|
+
Returns:
|
|
158
|
+
Arrow StructType.
|
|
159
|
+
"""
|
|
116
160
|
if names is not None and len(names) != len(args):
|
|
117
161
|
raise TypeError("Tuple metadata names length must match tuple elements")
|
|
118
162
|
|
|
@@ -125,6 +169,15 @@ def _struct_from_tuple(args, names: list[str] | None = None) -> pa.StructType:
|
|
|
125
169
|
|
|
126
170
|
|
|
127
171
|
def _arrow_type_from_metadata(base_hint, metadata):
|
|
172
|
+
"""Resolve an Arrow type from Annotated metadata when present.
|
|
173
|
+
|
|
174
|
+
Args:
|
|
175
|
+
base_hint: Base Python type hint.
|
|
176
|
+
metadata: Annotated metadata sequence.
|
|
177
|
+
|
|
178
|
+
Returns:
|
|
179
|
+
Arrow DataType or None.
|
|
180
|
+
"""
|
|
128
181
|
merged_metadata: dict[str, Any] = {}
|
|
129
182
|
|
|
130
183
|
for item in metadata:
|
|
@@ -187,6 +240,14 @@ def _arrow_type_from_metadata(base_hint, metadata):
|
|
|
187
240
|
|
|
188
241
|
|
|
189
242
|
def _arrow_type_from_hint(hint):
|
|
243
|
+
"""Infer an Arrow data type from a Python type hint.
|
|
244
|
+
|
|
245
|
+
Args:
|
|
246
|
+
hint: Python type hint.
|
|
247
|
+
|
|
248
|
+
Returns:
|
|
249
|
+
Arrow DataType.
|
|
250
|
+
"""
|
|
190
251
|
if get_origin(hint) is Annotated:
|
|
191
252
|
base_hint, *metadata = get_args(hint)
|
|
192
253
|
metadata_type = _arrow_type_from_metadata(base_hint, metadata)
|
|
@@ -229,6 +290,16 @@ def _arrow_type_from_hint(hint):
|
|
|
229
290
|
|
|
230
291
|
|
|
231
292
|
def arrow_field_from_hint(hint, name: str | None = None, index: int | None = None) -> pa.Field:
|
|
293
|
+
"""Build an Arrow field from a Python type hint.
|
|
294
|
+
|
|
295
|
+
Args:
|
|
296
|
+
hint: Python type hint.
|
|
297
|
+
name: Optional field name override.
|
|
298
|
+
index: Optional positional index.
|
|
299
|
+
|
|
300
|
+
Returns:
|
|
301
|
+
Arrow field.
|
|
302
|
+
"""
|
|
232
303
|
nullable = _is_optional(hint)
|
|
233
304
|
base_hint = _strip_optional(hint) if nullable else hint
|
|
234
305
|
|
|
@@ -269,6 +340,15 @@ def is_arrow_type_binary_like(arrow_type: pa.DataType) -> bool:
|
|
|
269
340
|
|
|
270
341
|
|
|
271
342
|
def _merge_metadata(left: Optional[Dict[bytes, bytes]], right: Optional[Dict[bytes, bytes]]) -> Optional[Dict[bytes, bytes]]:
|
|
343
|
+
"""Merge Arrow field metadata with right-hand precedence.
|
|
344
|
+
|
|
345
|
+
Args:
|
|
346
|
+
left: Left metadata mapping.
|
|
347
|
+
right: Right metadata mapping.
|
|
348
|
+
|
|
349
|
+
Returns:
|
|
350
|
+
Merged metadata mapping or None.
|
|
351
|
+
"""
|
|
272
352
|
if not left and not right:
|
|
273
353
|
return None
|
|
274
354
|
out: Dict[bytes, bytes] = {}
|
|
@@ -281,31 +361,87 @@ def _merge_metadata(left: Optional[Dict[bytes, bytes]], right: Optional[Dict[byt
|
|
|
281
361
|
|
|
282
362
|
|
|
283
363
|
def _is_null(dt: pa.DataType) -> bool:
|
|
364
|
+
"""Return True when the Arrow type is null.
|
|
365
|
+
|
|
366
|
+
Args:
|
|
367
|
+
dt: Arrow data type.
|
|
368
|
+
|
|
369
|
+
Returns:
|
|
370
|
+
True if null type.
|
|
371
|
+
"""
|
|
284
372
|
return pa.types.is_null(dt)
|
|
285
373
|
|
|
286
374
|
|
|
287
375
|
def _is_integer(dt: pa.DataType) -> bool:
|
|
376
|
+
"""Return True when the Arrow type is integer-like.
|
|
377
|
+
|
|
378
|
+
Args:
|
|
379
|
+
dt: Arrow data type.
|
|
380
|
+
|
|
381
|
+
Returns:
|
|
382
|
+
True if integer type.
|
|
383
|
+
"""
|
|
288
384
|
return pa.types.is_integer(dt)
|
|
289
385
|
|
|
290
386
|
|
|
291
387
|
def _is_signed_integer(dt: pa.DataType) -> bool:
|
|
388
|
+
"""Return True when the Arrow type is signed integer.
|
|
389
|
+
|
|
390
|
+
Args:
|
|
391
|
+
dt: Arrow data type.
|
|
392
|
+
|
|
393
|
+
Returns:
|
|
394
|
+
True if signed integer.
|
|
395
|
+
"""
|
|
292
396
|
return pa.types.is_signed_integer(dt)
|
|
293
397
|
|
|
294
398
|
|
|
295
399
|
def _is_unsigned_integer(dt: pa.DataType) -> bool:
|
|
400
|
+
"""Return True when the Arrow type is unsigned integer.
|
|
401
|
+
|
|
402
|
+
Args:
|
|
403
|
+
dt: Arrow data type.
|
|
404
|
+
|
|
405
|
+
Returns:
|
|
406
|
+
True if unsigned integer.
|
|
407
|
+
"""
|
|
296
408
|
return pa.types.is_unsigned_integer(dt)
|
|
297
409
|
|
|
298
410
|
|
|
299
411
|
def _is_floating(dt: pa.DataType) -> bool:
|
|
412
|
+
"""Return True when the Arrow type is floating-point.
|
|
413
|
+
|
|
414
|
+
Args:
|
|
415
|
+
dt: Arrow data type.
|
|
416
|
+
|
|
417
|
+
Returns:
|
|
418
|
+
True if floating type.
|
|
419
|
+
"""
|
|
300
420
|
return pa.types.is_floating(dt)
|
|
301
421
|
|
|
302
422
|
|
|
303
423
|
def _int_bit_width(dt: pa.DataType) -> int:
|
|
424
|
+
"""Return the bit width of an integer Arrow type.
|
|
425
|
+
|
|
426
|
+
Args:
|
|
427
|
+
dt: Arrow data type.
|
|
428
|
+
|
|
429
|
+
Returns:
|
|
430
|
+
Bit width.
|
|
431
|
+
"""
|
|
304
432
|
# int8/int16/int32/int64/uint8/...
|
|
305
433
|
return dt.bit_width
|
|
306
434
|
|
|
307
435
|
|
|
308
436
|
def _digits_for_uint_bits(bits: int) -> int:
|
|
437
|
+
"""Return a safe decimal digit count for unsigned integer bits.
|
|
438
|
+
|
|
439
|
+
Args:
|
|
440
|
+
bits: Unsigned bit width.
|
|
441
|
+
|
|
442
|
+
Returns:
|
|
443
|
+
Decimal digit count.
|
|
444
|
+
"""
|
|
309
445
|
# max uint bits -> decimal digits upper bound:
|
|
310
446
|
# uint64 max = 18446744073709551615 => 20 digits
|
|
311
447
|
# 2**bits - 1 has ceil(bits*log10(2)) digits, use safe upper bound
|
|
@@ -364,10 +500,27 @@ def _promote_int_types(left: pa.DataType, right: pa.DataType) -> pa.DataType:
|
|
|
364
500
|
|
|
365
501
|
def _promote_decimal_types(left: pa.Decimal128Type | pa.Decimal256Type,
|
|
366
502
|
right: pa.Decimal128Type | pa.Decimal256Type) -> pa.DataType:
|
|
503
|
+
"""Return a decimal type that can represent both inputs.
|
|
504
|
+
|
|
505
|
+
Args:
|
|
506
|
+
left: Left decimal type.
|
|
507
|
+
right: Right decimal type.
|
|
508
|
+
|
|
509
|
+
Returns:
|
|
510
|
+
Promoted decimal Arrow type.
|
|
511
|
+
"""
|
|
367
512
|
# Match scale, then set precision to fit both after scale alignment.
|
|
368
513
|
scale = max(left.scale, right.scale)
|
|
369
514
|
|
|
370
515
|
def adj_precision(d: pa.DataType) -> int:
|
|
516
|
+
"""Adjust precision to account for scale differences.
|
|
517
|
+
|
|
518
|
+
Args:
|
|
519
|
+
d: Decimal Arrow type.
|
|
520
|
+
|
|
521
|
+
Returns:
|
|
522
|
+
Adjusted precision.
|
|
523
|
+
"""
|
|
371
524
|
# Increasing scale can require increasing precision to keep same integer digits.
|
|
372
525
|
# integer_digits = precision - scale
|
|
373
526
|
integer_digits = d.precision - d.scale
|
|
@@ -382,6 +535,15 @@ def _promote_decimal_types(left: pa.Decimal128Type | pa.Decimal256Type,
|
|
|
382
535
|
|
|
383
536
|
|
|
384
537
|
def _promote_numeric(left: pa.DataType, right: pa.DataType) -> pa.DataType:
|
|
538
|
+
"""Promote numeric Arrow types to a common compatible type.
|
|
539
|
+
|
|
540
|
+
Args:
|
|
541
|
+
left: Left Arrow data type.
|
|
542
|
+
right: Right Arrow data type.
|
|
543
|
+
|
|
544
|
+
Returns:
|
|
545
|
+
Promoted Arrow data type.
|
|
546
|
+
"""
|
|
385
547
|
# decimal dominates ints/floats if present? Depends on your semantics.
|
|
386
548
|
# Here: decimals keep exactness when mixing with ints; floats win when mixing float+anything non-decimal.
|
|
387
549
|
if pa.types.is_decimal(left) and pa.types.is_decimal(right):
|
|
@@ -409,6 +571,15 @@ def _promote_numeric(left: pa.DataType, right: pa.DataType) -> pa.DataType:
|
|
|
409
571
|
|
|
410
572
|
|
|
411
573
|
def _merge_time_units(left_unit: str, right_unit: str) -> str:
|
|
574
|
+
"""Return the finer-grained Arrow time unit of two units.
|
|
575
|
+
|
|
576
|
+
Args:
|
|
577
|
+
left_unit: Left time unit.
|
|
578
|
+
right_unit: Right time unit.
|
|
579
|
+
|
|
580
|
+
Returns:
|
|
581
|
+
Selected time unit.
|
|
582
|
+
"""
|
|
412
583
|
# choose finer resolution (higher order index)
|
|
413
584
|
return left_unit if _INT_UNITS_ORDER[left_unit] >= _INT_UNITS_ORDER[right_unit] else right_unit
|
|
414
585
|
|
|
@@ -418,6 +589,16 @@ def merge_arrow_types(
|
|
|
418
589
|
right: Union[pa.DataType, pa.TimestampType, pa.ListType, pa.MapType, pa.StructType],
|
|
419
590
|
add_missing_columns: bool = True
|
|
420
591
|
) -> pa.DataType:
|
|
592
|
+
"""Merge two Arrow types into a compatible supertype.
|
|
593
|
+
|
|
594
|
+
Args:
|
|
595
|
+
left: Left Arrow data type.
|
|
596
|
+
right: Right Arrow data type.
|
|
597
|
+
add_missing_columns: Whether to include missing struct fields.
|
|
598
|
+
|
|
599
|
+
Returns:
|
|
600
|
+
Merged Arrow data type.
|
|
601
|
+
"""
|
|
421
602
|
# null is identity
|
|
422
603
|
if _is_null(left):
|
|
423
604
|
return right
|
|
@@ -563,6 +744,16 @@ def merge_arrow_fields(
|
|
|
563
744
|
right: pa.Field,
|
|
564
745
|
add_missing_columns: bool = True
|
|
565
746
|
) -> pa.Field:
|
|
747
|
+
"""Merge two Arrow fields into a compatible field.
|
|
748
|
+
|
|
749
|
+
Args:
|
|
750
|
+
left: Left Arrow field.
|
|
751
|
+
right: Right Arrow field.
|
|
752
|
+
add_missing_columns: Whether to include missing struct fields.
|
|
753
|
+
|
|
754
|
+
Returns:
|
|
755
|
+
Merged Arrow field.
|
|
756
|
+
"""
|
|
566
757
|
if left.name != right.name:
|
|
567
758
|
raise TypeError(f"Cannot merge fields with different names: {left.name!r} vs {right.name!r}")
|
|
568
759
|
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
"""Default value helpers for Python and Arrow types."""
|
|
2
|
+
|
|
1
3
|
import dataclasses
|
|
2
4
|
import datetime
|
|
3
5
|
import decimal
|
|
@@ -96,6 +98,14 @@ except ImportError:
|
|
|
96
98
|
_POLARS_DEFAULTS = {}
|
|
97
99
|
|
|
98
100
|
def _is_optional(hint) -> bool:
|
|
101
|
+
"""Return True when the type hint is Optional.
|
|
102
|
+
|
|
103
|
+
Args:
|
|
104
|
+
hint: Type hint to inspect.
|
|
105
|
+
|
|
106
|
+
Returns:
|
|
107
|
+
True if Optional.
|
|
108
|
+
"""
|
|
99
109
|
origin = get_origin(hint)
|
|
100
110
|
|
|
101
111
|
if origin in (Union, types.UnionType):
|
|
@@ -105,6 +115,14 @@ def _is_optional(hint) -> bool:
|
|
|
105
115
|
|
|
106
116
|
|
|
107
117
|
def _default_for_collection(origin):
|
|
118
|
+
"""Return default values for collection-like origins.
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
origin: Collection origin type.
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
Default collection instance or None.
|
|
125
|
+
"""
|
|
108
126
|
if origin in (list, MutableSequence):
|
|
109
127
|
return []
|
|
110
128
|
|
|
@@ -124,6 +142,14 @@ def _default_for_collection(origin):
|
|
|
124
142
|
|
|
125
143
|
|
|
126
144
|
def _default_for_tuple_args(args):
|
|
145
|
+
"""Return a default tuple based on element hints.
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
args: Tuple element type hints.
|
|
149
|
+
|
|
150
|
+
Returns:
|
|
151
|
+
Default tuple instance.
|
|
152
|
+
"""
|
|
127
153
|
if not args:
|
|
128
154
|
return tuple()
|
|
129
155
|
|
|
@@ -134,6 +160,14 @@ def _default_for_tuple_args(args):
|
|
|
134
160
|
|
|
135
161
|
|
|
136
162
|
def _default_for_dataclass(hint):
|
|
163
|
+
"""Return a default instance for a dataclass type.
|
|
164
|
+
|
|
165
|
+
Args:
|
|
166
|
+
hint: Dataclass type.
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
Dataclass instance with default values.
|
|
170
|
+
"""
|
|
137
171
|
kwargs = {}
|
|
138
172
|
|
|
139
173
|
for field in dataclasses.fields(hint):
|
|
@@ -156,6 +190,15 @@ def default_arrow_scalar(
|
|
|
156
190
|
dtype: Union[pa.DataType, pa.ListType, pa.MapType, pa.StructType, pa.FixedSizeListType],
|
|
157
191
|
nullable: bool
|
|
158
192
|
):
|
|
193
|
+
"""Return a default scalar for a given Arrow type.
|
|
194
|
+
|
|
195
|
+
Args:
|
|
196
|
+
dtype: Arrow data type.
|
|
197
|
+
nullable: Whether the scalar should be nullable.
|
|
198
|
+
|
|
199
|
+
Returns:
|
|
200
|
+
Arrow scalar default.
|
|
201
|
+
"""
|
|
159
202
|
if nullable:
|
|
160
203
|
return pa.scalar(None, type=dtype)
|
|
161
204
|
|
|
@@ -208,6 +251,19 @@ def default_arrow_array(
|
|
|
208
251
|
chunks: Optional[List[int]] = None,
|
|
209
252
|
scalar_default: Optional[pa.Scalar] = None,
|
|
210
253
|
) -> Union[pa.Array, pa.ChunkedArray]:
|
|
254
|
+
"""Return a default Arrow array or chunked array for a given type.
|
|
255
|
+
|
|
256
|
+
Args:
|
|
257
|
+
dtype: Arrow data type.
|
|
258
|
+
nullable: Whether values are nullable.
|
|
259
|
+
size: Number of elements.
|
|
260
|
+
memory_pool: Optional Arrow memory pool.
|
|
261
|
+
chunks: Optional chunk sizes.
|
|
262
|
+
scalar_default: Optional scalar default override.
|
|
263
|
+
|
|
264
|
+
Returns:
|
|
265
|
+
Arrow array or chunked array.
|
|
266
|
+
"""
|
|
211
267
|
if scalar_default is None:
|
|
212
268
|
scalar_default = default_arrow_scalar(dtype=dtype, nullable=nullable)
|
|
213
269
|
|
|
@@ -240,6 +296,14 @@ def default_arrow_array(
|
|
|
240
296
|
|
|
241
297
|
|
|
242
298
|
def default_python_scalar(hint: Any):
|
|
299
|
+
"""Return a default Python value for the given type hint.
|
|
300
|
+
|
|
301
|
+
Args:
|
|
302
|
+
hint: Type hint to generate defaults for.
|
|
303
|
+
|
|
304
|
+
Returns:
|
|
305
|
+
Default Python value.
|
|
306
|
+
"""
|
|
243
307
|
if _is_optional(hint):
|
|
244
308
|
return None
|
|
245
309
|
|
|
@@ -286,6 +350,15 @@ def default_scalar(
|
|
|
286
350
|
],
|
|
287
351
|
nullable: Optional[bool] = None
|
|
288
352
|
):
|
|
353
|
+
"""Return a default scalar value for Python or Arrow type hints.
|
|
354
|
+
|
|
355
|
+
Args:
|
|
356
|
+
hint: Python type or Arrow type/field.
|
|
357
|
+
nullable: Override nullability for Arrow types.
|
|
358
|
+
|
|
359
|
+
Returns:
|
|
360
|
+
Default scalar value.
|
|
361
|
+
"""
|
|
289
362
|
if isinstance(hint, pa.Field):
|
|
290
363
|
nullable = hint.nullable if nullable is None else nullable
|
|
291
364
|
return default_arrow_scalar(dtype=hint.type, nullable=nullable)
|
yggdrasil/version.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.1.32"
|