ygg 0.1.30__py3-none-any.whl → 0.1.32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. {ygg-0.1.30.dist-info → ygg-0.1.32.dist-info}/METADATA +1 -1
  2. ygg-0.1.32.dist-info/RECORD +60 -0
  3. yggdrasil/__init__.py +2 -0
  4. yggdrasil/databricks/__init__.py +2 -0
  5. yggdrasil/databricks/compute/__init__.py +2 -0
  6. yggdrasil/databricks/compute/cluster.py +241 -2
  7. yggdrasil/databricks/compute/execution_context.py +100 -11
  8. yggdrasil/databricks/compute/remote.py +16 -0
  9. yggdrasil/databricks/jobs/__init__.py +5 -0
  10. yggdrasil/databricks/jobs/config.py +31 -34
  11. yggdrasil/databricks/sql/__init__.py +2 -0
  12. yggdrasil/databricks/sql/engine.py +217 -36
  13. yggdrasil/databricks/sql/exceptions.py +1 -0
  14. yggdrasil/databricks/sql/statement_result.py +148 -1
  15. yggdrasil/databricks/sql/types.py +49 -1
  16. yggdrasil/databricks/workspaces/__init__.py +4 -1
  17. yggdrasil/databricks/workspaces/filesytem.py +344 -0
  18. yggdrasil/databricks/workspaces/io.py +1123 -0
  19. yggdrasil/databricks/workspaces/path.py +1415 -0
  20. yggdrasil/databricks/workspaces/path_kind.py +13 -0
  21. yggdrasil/databricks/workspaces/workspace.py +298 -154
  22. yggdrasil/dataclasses/__init__.py +2 -0
  23. yggdrasil/dataclasses/dataclass.py +42 -1
  24. yggdrasil/libs/__init__.py +2 -0
  25. yggdrasil/libs/databrickslib.py +9 -0
  26. yggdrasil/libs/extensions/__init__.py +2 -0
  27. yggdrasil/libs/extensions/polars_extensions.py +72 -0
  28. yggdrasil/libs/extensions/spark_extensions.py +116 -0
  29. yggdrasil/libs/pandaslib.py +7 -0
  30. yggdrasil/libs/polarslib.py +7 -0
  31. yggdrasil/libs/sparklib.py +41 -0
  32. yggdrasil/pyutils/__init__.py +4 -0
  33. yggdrasil/pyutils/callable_serde.py +106 -0
  34. yggdrasil/pyutils/exceptions.py +16 -0
  35. yggdrasil/pyutils/modules.py +44 -1
  36. yggdrasil/pyutils/parallel.py +29 -0
  37. yggdrasil/pyutils/python_env.py +301 -0
  38. yggdrasil/pyutils/retry.py +57 -0
  39. yggdrasil/requests/__init__.py +4 -0
  40. yggdrasil/requests/msal.py +124 -3
  41. yggdrasil/requests/session.py +18 -0
  42. yggdrasil/types/__init__.py +2 -0
  43. yggdrasil/types/cast/__init__.py +2 -1
  44. yggdrasil/types/cast/arrow_cast.py +131 -0
  45. yggdrasil/types/cast/cast_options.py +119 -1
  46. yggdrasil/types/cast/pandas_cast.py +29 -0
  47. yggdrasil/types/cast/polars_cast.py +47 -0
  48. yggdrasil/types/cast/polars_pandas_cast.py +29 -0
  49. yggdrasil/types/cast/registry.py +176 -0
  50. yggdrasil/types/cast/spark_cast.py +76 -0
  51. yggdrasil/types/cast/spark_pandas_cast.py +29 -0
  52. yggdrasil/types/cast/spark_polars_cast.py +28 -0
  53. yggdrasil/types/libs.py +2 -0
  54. yggdrasil/types/python_arrow.py +191 -0
  55. yggdrasil/types/python_defaults.py +73 -0
  56. yggdrasil/version.py +1 -0
  57. ygg-0.1.30.dist-info/RECORD +0 -56
  58. yggdrasil/databricks/workspaces/databricks_path.py +0 -784
  59. {ygg-0.1.30.dist-info → ygg-0.1.32.dist-info}/WHEEL +0 -0
  60. {ygg-0.1.30.dist-info → ygg-0.1.32.dist-info}/entry_points.txt +0 -0
  61. {ygg-0.1.30.dist-info → ygg-0.1.32.dist-info}/licenses/LICENSE +0 -0
  62. {ygg-0.1.30.dist-info → ygg-0.1.32.dist-info}/top_level.txt +0 -0
@@ -1,3 +1,5 @@
1
+ """Spark <-> Arrow casting helpers and converters."""
2
+
1
3
  from typing import Optional, Tuple, List
2
4
 
3
5
  import pyarrow as pa
@@ -57,10 +59,20 @@ if pyspark is not None:
57
59
  SparkStructField = T.StructField
58
60
 
59
61
  def spark_converter(*args, **kwargs):
62
+ """Return a register_converter wrapper when pyspark is available.
63
+
64
+ Args:
65
+ *args: Converter registration args.
66
+ **kwargs: Converter registration kwargs.
67
+
68
+ Returns:
69
+ Converter decorator.
70
+ """
60
71
  return register_converter(*args, **kwargs)
61
72
 
62
73
  else: # pyspark missing -> dummies + no-op decorator
63
74
  class _SparkDummy: # pragma: no cover
75
+ """Placeholder type for Spark symbols when pyspark is unavailable."""
64
76
  pass
65
77
 
66
78
  SparkDataFrame = _SparkDummy
@@ -70,7 +82,24 @@ else: # pyspark missing -> dummies + no-op decorator
70
82
  SparkStructField = _SparkDummy
71
83
 
72
84
  def spark_converter(*_args, **_kwargs): # pragma: no cover
85
+ """Return a no-op decorator when pyspark is unavailable.
86
+
87
+ Args:
88
+ *_args: Ignored positional args.
89
+ **_kwargs: Ignored keyword args.
90
+
91
+ Returns:
92
+ No-op decorator.
93
+ """
73
94
  def _decorator(func):
95
+ """Return the function unchanged.
96
+
97
+ Args:
98
+ func: Callable to return.
99
+
100
+ Returns:
101
+ Unchanged callable.
102
+ """
74
103
  return func
75
104
 
76
105
  return _decorator
@@ -227,6 +256,17 @@ def check_column_nullability(
227
256
  target_field: "T.StructField",
228
257
  mask: "pyspark.sql.Column"
229
258
  ) -> "pyspark.sql.Column":
259
+ """Fill nulls when the target field is non-nullable.
260
+
261
+ Args:
262
+ column: Spark column to adjust.
263
+ source_field: Source Spark field.
264
+ target_field: Target Spark field.
265
+ mask: Null mask column.
266
+
267
+ Returns:
268
+ Updated Spark column.
269
+ """
230
270
  source_nullable = True if source_field is None else source_field.nullable
231
271
  target_nullable = True if target_field is None else target_field.nullable
232
272
 
@@ -532,6 +572,15 @@ def spark_dataframe_to_spark_type(
532
572
  df: SparkDataFrame,
533
573
  options: Optional[CastOptions] = None,
534
574
  ) -> pa.DataType:
575
+ """Return the Spark DataFrame schema as a Spark data type.
576
+
577
+ Args:
578
+ df: Spark DataFrame.
579
+ options: Optional cast options.
580
+
581
+ Returns:
582
+ Spark DataType.
583
+ """
535
584
  return df.schema
536
585
 
537
586
 
@@ -540,6 +589,15 @@ def spark_dataframe_to_spark_field(
540
589
  df: SparkDataFrame,
541
590
  options: Optional[CastOptions] = None,
542
591
  ) -> pa.DataType:
592
+ """Return a Spark StructField for the DataFrame schema.
593
+
594
+ Args:
595
+ df: Spark DataFrame.
596
+ options: Optional cast options.
597
+
598
+ Returns:
599
+ Spark StructField.
600
+ """
543
601
  return SparkStructField(
544
602
  df.getAlias() or "root",
545
603
  df.schema,
@@ -552,6 +610,15 @@ def spark_dataframe_to_arrow_field(
552
610
  df: SparkDataFrame,
553
611
  options: Optional[CastOptions] = None,
554
612
  ) -> pa.DataType:
613
+ """Return an Arrow field representation of the DataFrame schema.
614
+
615
+ Args:
616
+ df: Spark DataFrame.
617
+ options: Optional cast options.
618
+
619
+ Returns:
620
+ Arrow field.
621
+ """
555
622
  return spark_field_to_arrow_field(
556
623
  spark_dataframe_to_spark_field(df, options),
557
624
  options
@@ -563,6 +630,15 @@ def spark_dataframe_to_arrow_schema(
563
630
  df: SparkDataFrame,
564
631
  options: Optional[CastOptions] = None,
565
632
  ) -> pa.DataType:
633
+ """Return an Arrow schema representation of the DataFrame.
634
+
635
+ Args:
636
+ df: Spark DataFrame.
637
+ options: Optional cast options.
638
+
639
+ Returns:
640
+ Arrow schema.
641
+ """
566
642
  return arrow_field_to_schema(
567
643
  spark_field_to_arrow_field(
568
644
  spark_dataframe_to_spark_field(df, options),
@@ -1,3 +1,5 @@
1
+ """Spark <-> pandas conversion helpers via Arrow."""
2
+
1
3
  from typing import Optional
2
4
 
3
5
  from .arrow_cast import CastOptions
@@ -34,18 +36,45 @@ if pyspark is not None and pandas is not None:
34
36
  PandasDataFrame = pandas.DataFrame
35
37
 
36
38
  def spark_pandas_converter(*args, **kwargs):
39
+ """Return a register_converter wrapper when dependencies are available.
40
+
41
+ Args:
42
+ *args: Converter registration args.
43
+ **kwargs: Converter registration kwargs.
44
+
45
+ Returns:
46
+ Converter decorator.
47
+ """
37
48
  return register_converter(*args, **kwargs)
38
49
 
39
50
  else:
40
51
  # Dummy stand-ins so decorators/annotations don't explode if one lib is absent
41
52
  class _Dummy: # pragma: no cover - only used when Spark or pandas not installed
53
+ """Placeholder type when Spark or pandas are unavailable."""
42
54
  pass
43
55
 
44
56
  SparkDataFrame = _Dummy
45
57
  PandasDataFrame = _Dummy
46
58
 
47
59
  def spark_pandas_converter(*_args, **_kwargs): # pragma: no cover - no-op decorator
60
+ """Return a no-op decorator when dependencies are missing.
61
+
62
+ Args:
63
+ *_args: Ignored positional args.
64
+ **_kwargs: Ignored keyword args.
65
+
66
+ Returns:
67
+ No-op decorator.
68
+ """
48
69
  def _decorator(func):
70
+ """Return the function unchanged.
71
+
72
+ Args:
73
+ func: Callable to return.
74
+
75
+ Returns:
76
+ Unchanged callable.
77
+ """
49
78
  return func
50
79
 
51
80
  return _decorator
@@ -1,3 +1,5 @@
1
+ """Spark <-> Polars conversion helpers via Arrow."""
2
+
1
3
  from typing import Optional
2
4
 
3
5
  import pyarrow as pa
@@ -23,10 +25,36 @@ __all__ = [
23
25
  # ---------------------------------------------------------------------------
24
26
  if pyspark is not None and polars is not None:
25
27
  def spark_polars_converter(*args, **kwargs):
28
+ """Return a register_converter wrapper when deps are available.
29
+
30
+ Args:
31
+ *args: Converter registration args.
32
+ **kwargs: Converter registration kwargs.
33
+
34
+ Returns:
35
+ Converter decorator.
36
+ """
26
37
  return register_converter(*args, **kwargs)
27
38
  else:
28
39
  def spark_polars_converter(*_args, **_kwargs): # pragma: no cover - no-op decorator
40
+ """Return a no-op decorator when deps are missing.
41
+
42
+ Args:
43
+ *_args: Ignored positional args.
44
+ **_kwargs: Ignored keyword args.
45
+
46
+ Returns:
47
+ No-op decorator.
48
+ """
29
49
  def _decorator(func):
50
+ """Return the function unchanged.
51
+
52
+ Args:
53
+ func: Callable to return.
54
+
55
+ Returns:
56
+ Unchanged callable.
57
+ """
30
58
  return func
31
59
 
32
60
  return _decorator
yggdrasil/types/libs.py CHANGED
@@ -1,3 +1,5 @@
1
+ """Re-export optional dependency helpers for types modules."""
2
+
1
3
  from ..libs import pandas, polars, pyspark, require_pandas, require_polars, require_pyspark
2
4
 
3
5
  __all__ = [
@@ -1,3 +1,5 @@
1
+ """Arrow type inference utilities from Python type hints."""
2
+
1
3
  import dataclasses
2
4
  import datetime
3
5
  import decimal
@@ -57,6 +59,14 @@ _INT_UNITS_ORDER = {"s": 0, "ms": 1, "us": 2, "ns": 3}
57
59
 
58
60
 
59
61
  def _is_optional(hint) -> bool:
62
+ """Return True when the hint includes None.
63
+
64
+ Args:
65
+ hint: Type hint to inspect.
66
+
67
+ Returns:
68
+ True if Optional.
69
+ """
60
70
  origin = get_origin(hint)
61
71
 
62
72
  if origin is Annotated:
@@ -69,6 +79,14 @@ def _is_optional(hint) -> bool:
69
79
 
70
80
 
71
81
  def _strip_optional(hint):
82
+ """Return the underlying hint without Optional[...].
83
+
84
+ Args:
85
+ hint: Type hint to inspect.
86
+
87
+ Returns:
88
+ Hint without Optional wrapper.
89
+ """
72
90
  origin = get_origin(hint)
73
91
 
74
92
  if origin is Annotated:
@@ -89,6 +107,15 @@ def _strip_optional(hint):
89
107
 
90
108
 
91
109
  def _field_name(hint, index: int | None) -> str:
110
+ """Derive a field name from a hint and optional index.
111
+
112
+ Args:
113
+ hint: Type hint to inspect.
114
+ index: Optional positional index.
115
+
116
+ Returns:
117
+ Field name string.
118
+ """
92
119
  name = getattr(hint, "__name__", None)
93
120
 
94
121
  if name:
@@ -101,6 +128,14 @@ def _field_name(hint, index: int | None) -> str:
101
128
 
102
129
 
103
130
  def _struct_from_dataclass(hint) -> pa.StructType:
131
+ """Build an Arrow struct type from a dataclass.
132
+
133
+ Args:
134
+ hint: Dataclass type.
135
+
136
+ Returns:
137
+ Arrow StructType.
138
+ """
104
139
  fields = []
105
140
 
106
141
  for field in dataclasses.fields(hint):
@@ -113,6 +148,15 @@ def _struct_from_dataclass(hint) -> pa.StructType:
113
148
 
114
149
 
115
150
  def _struct_from_tuple(args, names: list[str] | None = None) -> pa.StructType:
151
+ """Build an Arrow struct type from tuple hints.
152
+
153
+ Args:
154
+ args: Tuple element type hints.
155
+ names: Optional field names.
156
+
157
+ Returns:
158
+ Arrow StructType.
159
+ """
116
160
  if names is not None and len(names) != len(args):
117
161
  raise TypeError("Tuple metadata names length must match tuple elements")
118
162
 
@@ -125,6 +169,15 @@ def _struct_from_tuple(args, names: list[str] | None = None) -> pa.StructType:
125
169
 
126
170
 
127
171
  def _arrow_type_from_metadata(base_hint, metadata):
172
+ """Resolve an Arrow type from Annotated metadata when present.
173
+
174
+ Args:
175
+ base_hint: Base Python type hint.
176
+ metadata: Annotated metadata sequence.
177
+
178
+ Returns:
179
+ Arrow DataType or None.
180
+ """
128
181
  merged_metadata: dict[str, Any] = {}
129
182
 
130
183
  for item in metadata:
@@ -187,6 +240,14 @@ def _arrow_type_from_metadata(base_hint, metadata):
187
240
 
188
241
 
189
242
  def _arrow_type_from_hint(hint):
243
+ """Infer an Arrow data type from a Python type hint.
244
+
245
+ Args:
246
+ hint: Python type hint.
247
+
248
+ Returns:
249
+ Arrow DataType.
250
+ """
190
251
  if get_origin(hint) is Annotated:
191
252
  base_hint, *metadata = get_args(hint)
192
253
  metadata_type = _arrow_type_from_metadata(base_hint, metadata)
@@ -229,6 +290,16 @@ def _arrow_type_from_hint(hint):
229
290
 
230
291
 
231
292
  def arrow_field_from_hint(hint, name: str | None = None, index: int | None = None) -> pa.Field:
293
+ """Build an Arrow field from a Python type hint.
294
+
295
+ Args:
296
+ hint: Python type hint.
297
+ name: Optional field name override.
298
+ index: Optional positional index.
299
+
300
+ Returns:
301
+ Arrow field.
302
+ """
232
303
  nullable = _is_optional(hint)
233
304
  base_hint = _strip_optional(hint) if nullable else hint
234
305
 
@@ -269,6 +340,15 @@ def is_arrow_type_binary_like(arrow_type: pa.DataType) -> bool:
269
340
 
270
341
 
271
342
  def _merge_metadata(left: Optional[Dict[bytes, bytes]], right: Optional[Dict[bytes, bytes]]) -> Optional[Dict[bytes, bytes]]:
343
+ """Merge Arrow field metadata with right-hand precedence.
344
+
345
+ Args:
346
+ left: Left metadata mapping.
347
+ right: Right metadata mapping.
348
+
349
+ Returns:
350
+ Merged metadata mapping or None.
351
+ """
272
352
  if not left and not right:
273
353
  return None
274
354
  out: Dict[bytes, bytes] = {}
@@ -281,31 +361,87 @@ def _merge_metadata(left: Optional[Dict[bytes, bytes]], right: Optional[Dict[byt
281
361
 
282
362
 
283
363
  def _is_null(dt: pa.DataType) -> bool:
364
+ """Return True when the Arrow type is null.
365
+
366
+ Args:
367
+ dt: Arrow data type.
368
+
369
+ Returns:
370
+ True if null type.
371
+ """
284
372
  return pa.types.is_null(dt)
285
373
 
286
374
 
287
375
  def _is_integer(dt: pa.DataType) -> bool:
376
+ """Return True when the Arrow type is integer-like.
377
+
378
+ Args:
379
+ dt: Arrow data type.
380
+
381
+ Returns:
382
+ True if integer type.
383
+ """
288
384
  return pa.types.is_integer(dt)
289
385
 
290
386
 
291
387
  def _is_signed_integer(dt: pa.DataType) -> bool:
388
+ """Return True when the Arrow type is signed integer.
389
+
390
+ Args:
391
+ dt: Arrow data type.
392
+
393
+ Returns:
394
+ True if signed integer.
395
+ """
292
396
  return pa.types.is_signed_integer(dt)
293
397
 
294
398
 
295
399
  def _is_unsigned_integer(dt: pa.DataType) -> bool:
400
+ """Return True when the Arrow type is unsigned integer.
401
+
402
+ Args:
403
+ dt: Arrow data type.
404
+
405
+ Returns:
406
+ True if unsigned integer.
407
+ """
296
408
  return pa.types.is_unsigned_integer(dt)
297
409
 
298
410
 
299
411
  def _is_floating(dt: pa.DataType) -> bool:
412
+ """Return True when the Arrow type is floating-point.
413
+
414
+ Args:
415
+ dt: Arrow data type.
416
+
417
+ Returns:
418
+ True if floating type.
419
+ """
300
420
  return pa.types.is_floating(dt)
301
421
 
302
422
 
303
423
  def _int_bit_width(dt: pa.DataType) -> int:
424
+ """Return the bit width of an integer Arrow type.
425
+
426
+ Args:
427
+ dt: Arrow data type.
428
+
429
+ Returns:
430
+ Bit width.
431
+ """
304
432
  # int8/int16/int32/int64/uint8/...
305
433
  return dt.bit_width
306
434
 
307
435
 
308
436
  def _digits_for_uint_bits(bits: int) -> int:
437
+ """Return a safe decimal digit count for unsigned integer bits.
438
+
439
+ Args:
440
+ bits: Unsigned bit width.
441
+
442
+ Returns:
443
+ Decimal digit count.
444
+ """
309
445
  # max uint bits -> decimal digits upper bound:
310
446
  # uint64 max = 18446744073709551615 => 20 digits
311
447
  # 2**bits - 1 has ceil(bits*log10(2)) digits, use safe upper bound
@@ -364,10 +500,27 @@ def _promote_int_types(left: pa.DataType, right: pa.DataType) -> pa.DataType:
364
500
 
365
501
  def _promote_decimal_types(left: pa.Decimal128Type | pa.Decimal256Type,
366
502
  right: pa.Decimal128Type | pa.Decimal256Type) -> pa.DataType:
503
+ """Return a decimal type that can represent both inputs.
504
+
505
+ Args:
506
+ left: Left decimal type.
507
+ right: Right decimal type.
508
+
509
+ Returns:
510
+ Promoted decimal Arrow type.
511
+ """
367
512
  # Match scale, then set precision to fit both after scale alignment.
368
513
  scale = max(left.scale, right.scale)
369
514
 
370
515
  def adj_precision(d: pa.DataType) -> int:
516
+ """Adjust precision to account for scale differences.
517
+
518
+ Args:
519
+ d: Decimal Arrow type.
520
+
521
+ Returns:
522
+ Adjusted precision.
523
+ """
371
524
  # Increasing scale can require increasing precision to keep same integer digits.
372
525
  # integer_digits = precision - scale
373
526
  integer_digits = d.precision - d.scale
@@ -382,6 +535,15 @@ def _promote_decimal_types(left: pa.Decimal128Type | pa.Decimal256Type,
382
535
 
383
536
 
384
537
  def _promote_numeric(left: pa.DataType, right: pa.DataType) -> pa.DataType:
538
+ """Promote numeric Arrow types to a common compatible type.
539
+
540
+ Args:
541
+ left: Left Arrow data type.
542
+ right: Right Arrow data type.
543
+
544
+ Returns:
545
+ Promoted Arrow data type.
546
+ """
385
547
  # decimal dominates ints/floats if present? Depends on your semantics.
386
548
  # Here: decimals keep exactness when mixing with ints; floats win when mixing float+anything non-decimal.
387
549
  if pa.types.is_decimal(left) and pa.types.is_decimal(right):
@@ -409,6 +571,15 @@ def _promote_numeric(left: pa.DataType, right: pa.DataType) -> pa.DataType:
409
571
 
410
572
 
411
573
  def _merge_time_units(left_unit: str, right_unit: str) -> str:
574
+ """Return the finer-grained Arrow time unit of two units.
575
+
576
+ Args:
577
+ left_unit: Left time unit.
578
+ right_unit: Right time unit.
579
+
580
+ Returns:
581
+ Selected time unit.
582
+ """
412
583
  # choose finer resolution (higher order index)
413
584
  return left_unit if _INT_UNITS_ORDER[left_unit] >= _INT_UNITS_ORDER[right_unit] else right_unit
414
585
 
@@ -418,6 +589,16 @@ def merge_arrow_types(
418
589
  right: Union[pa.DataType, pa.TimestampType, pa.ListType, pa.MapType, pa.StructType],
419
590
  add_missing_columns: bool = True
420
591
  ) -> pa.DataType:
592
+ """Merge two Arrow types into a compatible supertype.
593
+
594
+ Args:
595
+ left: Left Arrow data type.
596
+ right: Right Arrow data type.
597
+ add_missing_columns: Whether to include missing struct fields.
598
+
599
+ Returns:
600
+ Merged Arrow data type.
601
+ """
421
602
  # null is identity
422
603
  if _is_null(left):
423
604
  return right
@@ -563,6 +744,16 @@ def merge_arrow_fields(
563
744
  right: pa.Field,
564
745
  add_missing_columns: bool = True
565
746
  ) -> pa.Field:
747
+ """Merge two Arrow fields into a compatible field.
748
+
749
+ Args:
750
+ left: Left Arrow field.
751
+ right: Right Arrow field.
752
+ add_missing_columns: Whether to include missing struct fields.
753
+
754
+ Returns:
755
+ Merged Arrow field.
756
+ """
566
757
  if left.name != right.name:
567
758
  raise TypeError(f"Cannot merge fields with different names: {left.name!r} vs {right.name!r}")
568
759
 
@@ -1,3 +1,5 @@
1
+ """Default value helpers for Python and Arrow types."""
2
+
1
3
  import dataclasses
2
4
  import datetime
3
5
  import decimal
@@ -96,6 +98,14 @@ except ImportError:
96
98
  _POLARS_DEFAULTS = {}
97
99
 
98
100
  def _is_optional(hint) -> bool:
101
+ """Return True when the type hint is Optional.
102
+
103
+ Args:
104
+ hint: Type hint to inspect.
105
+
106
+ Returns:
107
+ True if Optional.
108
+ """
99
109
  origin = get_origin(hint)
100
110
 
101
111
  if origin in (Union, types.UnionType):
@@ -105,6 +115,14 @@ def _is_optional(hint) -> bool:
105
115
 
106
116
 
107
117
  def _default_for_collection(origin):
118
+ """Return default values for collection-like origins.
119
+
120
+ Args:
121
+ origin: Collection origin type.
122
+
123
+ Returns:
124
+ Default collection instance or None.
125
+ """
108
126
  if origin in (list, MutableSequence):
109
127
  return []
110
128
 
@@ -124,6 +142,14 @@ def _default_for_collection(origin):
124
142
 
125
143
 
126
144
  def _default_for_tuple_args(args):
145
+ """Return a default tuple based on element hints.
146
+
147
+ Args:
148
+ args: Tuple element type hints.
149
+
150
+ Returns:
151
+ Default tuple instance.
152
+ """
127
153
  if not args:
128
154
  return tuple()
129
155
 
@@ -134,6 +160,14 @@ def _default_for_tuple_args(args):
134
160
 
135
161
 
136
162
  def _default_for_dataclass(hint):
163
+ """Return a default instance for a dataclass type.
164
+
165
+ Args:
166
+ hint: Dataclass type.
167
+
168
+ Returns:
169
+ Dataclass instance with default values.
170
+ """
137
171
  kwargs = {}
138
172
 
139
173
  for field in dataclasses.fields(hint):
@@ -156,6 +190,15 @@ def default_arrow_scalar(
156
190
  dtype: Union[pa.DataType, pa.ListType, pa.MapType, pa.StructType, pa.FixedSizeListType],
157
191
  nullable: bool
158
192
  ):
193
+ """Return a default scalar for a given Arrow type.
194
+
195
+ Args:
196
+ dtype: Arrow data type.
197
+ nullable: Whether the scalar should be nullable.
198
+
199
+ Returns:
200
+ Arrow scalar default.
201
+ """
159
202
  if nullable:
160
203
  return pa.scalar(None, type=dtype)
161
204
 
@@ -208,6 +251,19 @@ def default_arrow_array(
208
251
  chunks: Optional[List[int]] = None,
209
252
  scalar_default: Optional[pa.Scalar] = None,
210
253
  ) -> Union[pa.Array, pa.ChunkedArray]:
254
+ """Return a default Arrow array or chunked array for a given type.
255
+
256
+ Args:
257
+ dtype: Arrow data type.
258
+ nullable: Whether values are nullable.
259
+ size: Number of elements.
260
+ memory_pool: Optional Arrow memory pool.
261
+ chunks: Optional chunk sizes.
262
+ scalar_default: Optional scalar default override.
263
+
264
+ Returns:
265
+ Arrow array or chunked array.
266
+ """
211
267
  if scalar_default is None:
212
268
  scalar_default = default_arrow_scalar(dtype=dtype, nullable=nullable)
213
269
 
@@ -240,6 +296,14 @@ def default_arrow_array(
240
296
 
241
297
 
242
298
  def default_python_scalar(hint: Any):
299
+ """Return a default Python value for the given type hint.
300
+
301
+ Args:
302
+ hint: Type hint to generate defaults for.
303
+
304
+ Returns:
305
+ Default Python value.
306
+ """
243
307
  if _is_optional(hint):
244
308
  return None
245
309
 
@@ -286,6 +350,15 @@ def default_scalar(
286
350
  ],
287
351
  nullable: Optional[bool] = None
288
352
  ):
353
+ """Return a default scalar value for Python or Arrow type hints.
354
+
355
+ Args:
356
+ hint: Python type or Arrow type/field.
357
+ nullable: Override nullability for Arrow types.
358
+
359
+ Returns:
360
+ Default scalar value.
361
+ """
289
362
  if isinstance(hint, pa.Field):
290
363
  nullable = hint.nullable if nullable is None else nullable
291
364
  return default_arrow_scalar(dtype=hint.type, nullable=nullable)
yggdrasil/version.py ADDED
@@ -0,0 +1 @@
1
+ __version__ = "0.1.32"