ygg 0.1.30__py3-none-any.whl → 0.1.32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. {ygg-0.1.30.dist-info → ygg-0.1.32.dist-info}/METADATA +1 -1
  2. ygg-0.1.32.dist-info/RECORD +60 -0
  3. yggdrasil/__init__.py +2 -0
  4. yggdrasil/databricks/__init__.py +2 -0
  5. yggdrasil/databricks/compute/__init__.py +2 -0
  6. yggdrasil/databricks/compute/cluster.py +241 -2
  7. yggdrasil/databricks/compute/execution_context.py +100 -11
  8. yggdrasil/databricks/compute/remote.py +16 -0
  9. yggdrasil/databricks/jobs/__init__.py +5 -0
  10. yggdrasil/databricks/jobs/config.py +31 -34
  11. yggdrasil/databricks/sql/__init__.py +2 -0
  12. yggdrasil/databricks/sql/engine.py +217 -36
  13. yggdrasil/databricks/sql/exceptions.py +1 -0
  14. yggdrasil/databricks/sql/statement_result.py +148 -1
  15. yggdrasil/databricks/sql/types.py +49 -1
  16. yggdrasil/databricks/workspaces/__init__.py +4 -1
  17. yggdrasil/databricks/workspaces/filesytem.py +344 -0
  18. yggdrasil/databricks/workspaces/io.py +1123 -0
  19. yggdrasil/databricks/workspaces/path.py +1415 -0
  20. yggdrasil/databricks/workspaces/path_kind.py +13 -0
  21. yggdrasil/databricks/workspaces/workspace.py +298 -154
  22. yggdrasil/dataclasses/__init__.py +2 -0
  23. yggdrasil/dataclasses/dataclass.py +42 -1
  24. yggdrasil/libs/__init__.py +2 -0
  25. yggdrasil/libs/databrickslib.py +9 -0
  26. yggdrasil/libs/extensions/__init__.py +2 -0
  27. yggdrasil/libs/extensions/polars_extensions.py +72 -0
  28. yggdrasil/libs/extensions/spark_extensions.py +116 -0
  29. yggdrasil/libs/pandaslib.py +7 -0
  30. yggdrasil/libs/polarslib.py +7 -0
  31. yggdrasil/libs/sparklib.py +41 -0
  32. yggdrasil/pyutils/__init__.py +4 -0
  33. yggdrasil/pyutils/callable_serde.py +106 -0
  34. yggdrasil/pyutils/exceptions.py +16 -0
  35. yggdrasil/pyutils/modules.py +44 -1
  36. yggdrasil/pyutils/parallel.py +29 -0
  37. yggdrasil/pyutils/python_env.py +301 -0
  38. yggdrasil/pyutils/retry.py +57 -0
  39. yggdrasil/requests/__init__.py +4 -0
  40. yggdrasil/requests/msal.py +124 -3
  41. yggdrasil/requests/session.py +18 -0
  42. yggdrasil/types/__init__.py +2 -0
  43. yggdrasil/types/cast/__init__.py +2 -1
  44. yggdrasil/types/cast/arrow_cast.py +131 -0
  45. yggdrasil/types/cast/cast_options.py +119 -1
  46. yggdrasil/types/cast/pandas_cast.py +29 -0
  47. yggdrasil/types/cast/polars_cast.py +47 -0
  48. yggdrasil/types/cast/polars_pandas_cast.py +29 -0
  49. yggdrasil/types/cast/registry.py +176 -0
  50. yggdrasil/types/cast/spark_cast.py +76 -0
  51. yggdrasil/types/cast/spark_pandas_cast.py +29 -0
  52. yggdrasil/types/cast/spark_polars_cast.py +28 -0
  53. yggdrasil/types/libs.py +2 -0
  54. yggdrasil/types/python_arrow.py +191 -0
  55. yggdrasil/types/python_defaults.py +73 -0
  56. yggdrasil/version.py +1 -0
  57. ygg-0.1.30.dist-info/RECORD +0 -56
  58. yggdrasil/databricks/workspaces/databricks_path.py +0 -784
  59. {ygg-0.1.30.dist-info → ygg-0.1.32.dist-info}/WHEEL +0 -0
  60. {ygg-0.1.30.dist-info → ygg-0.1.32.dist-info}/entry_points.txt +0 -0
  61. {ygg-0.1.30.dist-info → ygg-0.1.32.dist-info}/licenses/LICENSE +0 -0
  62. {ygg-0.1.30.dist-info → ygg-0.1.32.dist-info}/top_level.txt +0 -0
@@ -1,3 +1,5 @@
1
+ """Dataclass helpers that integrate with Arrow schemas and safe casting."""
2
+
1
3
  import dataclasses
2
4
  from inspect import isclass
3
5
  from typing import Any, Iterable, Mapping, Tuple
@@ -18,6 +20,7 @@ def is_yggdataclass(cls_or_instance: Any) -> bool:
18
20
 
19
21
  Args:
20
22
  cls_or_instance: The class or instance to check.
23
+
21
24
  Returns:
22
25
  True if the class or instance
23
26
  is a yggdrasil dataclass, False otherwise.
@@ -26,6 +29,14 @@ def is_yggdataclass(cls_or_instance: Any) -> bool:
26
29
 
27
30
 
28
31
  def get_dataclass_arrow_field(cls_or_instance: Any) -> pa.Field:
32
+ """Return a cached Arrow Field describing the dataclass type.
33
+
34
+ Args:
35
+ cls_or_instance: Dataclass class or instance.
36
+
37
+ Returns:
38
+ Arrow field describing the dataclass schema.
39
+ """
29
40
  if is_yggdataclass(cls_or_instance):
30
41
  return cls_or_instance.__arrow_field__()
31
42
 
@@ -58,7 +69,7 @@ def yggdataclass(
58
69
  kw_only=False, slots=False,
59
70
  weakref_slot=False
60
71
  ):
61
- """Add dunder methods based on the fields defined in the class.
72
+ """Decorate a class with dataclass behavior plus Arrow helpers.
62
73
 
63
74
  Examines PEP 526 __annotations__ to determine fields.
64
75
 
@@ -73,7 +84,24 @@ def yggdataclass(
73
84
  """
74
85
 
75
86
  def wrap(c):
87
+ """Wrap a class with yggdrasil dataclass enhancements.
88
+
89
+ Args:
90
+ c: Class to decorate.
91
+
92
+ Returns:
93
+ Decorated dataclass type.
94
+ """
95
+
76
96
  def _init_public_fields(cls):
97
+ """Return init-enabled, public dataclass fields.
98
+
99
+ Args:
100
+ cls: Dataclass type.
101
+
102
+ Returns:
103
+ List of dataclasses.Field objects.
104
+ """
77
105
  return [
78
106
  field
79
107
  for field in dataclasses.fields(cls)
@@ -83,6 +111,11 @@ def yggdataclass(
83
111
  if not hasattr(c, "default_instance"):
84
112
  @classmethod
85
113
  def default_instance(cls):
114
+ """Return a default instance built from type defaults.
115
+
116
+ Returns:
117
+ Default instance of the dataclass.
118
+ """
86
119
  from yggdrasil.types import default_scalar
87
120
 
88
121
  if not hasattr(cls, "__default_instance__"):
@@ -135,6 +168,14 @@ def yggdataclass(
135
168
  if not hasattr(c, "__arrow_field__"):
136
169
  @classmethod
137
170
  def __arrow_field__(cls, name: str | None = None):
171
+ """Return an Arrow field representing the dataclass schema.
172
+
173
+ Args:
174
+ name: Optional override for the field name.
175
+
176
+ Returns:
177
+ Arrow field describing the dataclass schema.
178
+ """
138
179
  from yggdrasil.types.python_arrow import arrow_field_from_hint
139
180
 
140
181
  return arrow_field_from_hint(cls, name=name)
@@ -1,3 +1,5 @@
1
+ """Helper utilities for optional dependency integrations."""
2
+
1
3
  from .sparklib import *
2
4
  from .polarslib import *
3
5
  from .pandaslib import *
@@ -1,3 +1,5 @@
1
+ """Optional Databricks SDK dependency helpers."""
2
+
1
3
  try:
2
4
  import databricks
3
5
  import databricks.sdk # type: ignore
@@ -6,7 +8,9 @@ try:
6
8
  databricks_sdk = databricks.sdk
7
9
  except ImportError:
8
10
  class _DatabricksDummy:
11
+ """Placeholder object that raises if Databricks SDK is required."""
9
12
  def __getattr__(self, item):
13
+ """Raise an error when accessing missing Databricks SDK attributes."""
10
14
  require_databricks_sdk()
11
15
 
12
16
  databricks = _DatabricksDummy
@@ -14,6 +18,11 @@ except ImportError:
14
18
 
15
19
 
16
20
  def require_databricks_sdk():
21
+ """Ensure the Databricks SDK is available before use.
22
+
23
+ Returns:
24
+ None.
25
+ """
17
26
  if databricks_sdk is None:
18
27
  raise ImportError(
19
28
  "databricks_sdk is required to use this function. "
@@ -1,2 +1,4 @@
1
+ """Extensions for Spark and Polars helpers."""
2
+
1
3
  from .spark_extensions import *
2
4
  from .polars_extensions import *
@@ -1,3 +1,5 @@
1
+ """Polars DataFrame extension helpers for joins and resampling."""
2
+
1
3
  from __future__ import annotations
2
4
 
3
5
  import datetime
@@ -39,6 +41,14 @@ def join_coalesced(
39
41
 
40
42
 
41
43
  def _normalize_group_by(group_by: str | Sequence[str] | None) -> list[str] | None:
44
+ """Normalize group_by inputs into a list or None.
45
+
46
+ Args:
47
+ group_by: Grouping column or columns.
48
+
49
+ Returns:
50
+ List of column names or None.
51
+ """
42
52
  if group_by is None:
43
53
  return None
44
54
  if isinstance(group_by, str):
@@ -57,6 +67,15 @@ def _filter_kwargs_for_callable(fn: object, kwargs: dict[str, Any]) -> dict[str,
57
67
 
58
68
 
59
69
  def _expr_from_agg(col: str, agg: Any) -> "pl.Expr":
70
+ """Build a Polars expression from an aggregation spec.
71
+
72
+ Args:
73
+ col: Column name to aggregate.
74
+ agg: Aggregation spec (expr, callable, or string).
75
+
76
+ Returns:
77
+ Polars expression.
78
+ """
60
79
  base = pl.col(col)
61
80
 
62
81
  if isinstance(agg, pl.Expr):
@@ -80,6 +99,14 @@ def _expr_from_agg(col: str, agg: Any) -> "pl.Expr":
80
99
 
81
100
 
82
101
  def _normalize_aggs(agg: AggSpec) -> list["pl.Expr"]:
102
+ """Normalize aggregation specs into a list of Polars expressions.
103
+
104
+ Args:
105
+ agg: Mapping or sequence of aggregation specs.
106
+
107
+ Returns:
108
+ List of Polars expressions.
109
+ """
83
110
  if isinstance(agg, Mapping):
84
111
  return [_expr_from_agg(col, spec) for col, spec in agg.items()]
85
112
 
@@ -91,11 +118,27 @@ def _normalize_aggs(agg: AggSpec) -> list["pl.Expr"]:
91
118
 
92
119
 
93
120
  def _is_datetime(dtype: object) -> bool:
121
+ """Return True when the dtype is a Polars datetime.
122
+
123
+ Args:
124
+ dtype: Polars dtype to inspect.
125
+
126
+ Returns:
127
+ True if dtype is Polars Datetime.
128
+ """
94
129
  # Datetime-only inference (per requirement), version-safe.
95
130
  return isinstance(dtype, pl.Datetime)
96
131
 
97
132
 
98
133
  def _infer_time_col(df: "pl.DataFrame") -> str:
134
+ """Infer the first datetime-like column name from a DataFrame.
135
+
136
+ Args:
137
+ df: Polars DataFrame to inspect.
138
+
139
+ Returns:
140
+ Column name of the first datetime field.
141
+ """
99
142
  # Find first Datetime column in schema order; ignore Date columns.
100
143
  for name, dtype in df.schema.items():
101
144
  if _is_datetime(dtype):
@@ -106,6 +149,15 @@ def _infer_time_col(df: "pl.DataFrame") -> str:
106
149
 
107
150
 
108
151
  def _ensure_datetime_like(df: "pl.DataFrame", time_col: str) -> "pl.DataFrame":
152
+ """Ensure a time column is cast to datetime for resampling.
153
+
154
+ Args:
155
+ df: Polars DataFrame.
156
+ time_col: Column name to validate.
157
+
158
+ Returns:
159
+ DataFrame with time column cast to datetime if needed.
160
+ """
109
161
  dtype = df.schema.get(time_col)
110
162
  if dtype is None:
111
163
  raise KeyError(f"resample: time_col '{time_col}' not found in DataFrame columns.")
@@ -151,6 +203,14 @@ def _timedelta_to_polars_duration(td: datetime.timedelta) -> str:
151
203
 
152
204
 
153
205
  def _normalize_duration(v: str | datetime.timedelta | None) -> str | None:
206
+ """Normalize duration inputs to a Polars duration string.
207
+
208
+ Args:
209
+ v: Duration string, timedelta, or None.
210
+
211
+ Returns:
212
+ Normalized duration string or None.
213
+ """
154
214
  if v is None:
155
215
  return None
156
216
  if isinstance(v, str):
@@ -168,6 +228,18 @@ def _upsample_single(
168
228
  offset: str | datetime.timedelta | None,
169
229
  keep_group_order: bool,
170
230
  ) -> "pl.DataFrame":
231
+ """Upsample a single DataFrame with normalized duration arguments.
232
+
233
+ Args:
234
+ df: Polars DataFrame to upsample.
235
+ time_col: Name of the time column.
236
+ every: Sampling interval.
237
+ offset: Optional offset interval.
238
+ keep_group_order: Preserve input order when grouping.
239
+
240
+ Returns:
241
+ Upsampled Polars DataFrame.
242
+ """
171
243
  df = df.sort(time_col)
172
244
 
173
245
  every_n = _normalize_duration(every)
@@ -1,3 +1,5 @@
1
+ """Spark DataFrame extension helpers for aliases and resampling."""
2
+
1
3
  import datetime
2
4
  import inspect
3
5
  import re
@@ -30,6 +32,15 @@ _COL_RE = re.compile(r"Column<\s*['\"]?`?(.+?)`?['\"]?\s*>")
30
32
 
31
33
 
32
34
  def _require_pyspark(fn_name: str) -> None:
35
+ """Raise when PySpark is unavailable for a requested helper."""
36
+ """Raise when PySpark is unavailable for a requested helper.
37
+
38
+ Args:
39
+ fn_name: Name of the calling function.
40
+
41
+ Returns:
42
+ None.
43
+ """
33
44
  if pyspark is None or F is None or T is None:
34
45
  raise RuntimeError(
35
46
  f"{fn_name} requires PySpark to be available. "
@@ -41,6 +52,15 @@ def getAliases(
41
52
  obj: Union[SparkDataFrame, SparkColumn, str, Iterable[Union[SparkDataFrame, SparkColumn, str]]],
42
53
  full: bool = True,
43
54
  ) -> list[str]:
55
+ """Return aliases for Spark columns/dataframes or collections.
56
+
57
+ Args:
58
+ obj: Spark DataFrame/Column, string, or iterable of these.
59
+ full: Whether to return full qualified names.
60
+
61
+ Returns:
62
+ List of alias strings.
63
+ """
44
64
  if obj is None:
45
65
  return []
46
66
 
@@ -92,6 +112,16 @@ def latest(
92
112
  partitionBy: List[Union[str, SparkColumn]],
93
113
  orderBy: List[Union[str, SparkColumn]],
94
114
  ) -> SparkDataFrame:
115
+ """Return the latest rows per partition based on ordering.
116
+
117
+ Args:
118
+ df: Spark DataFrame.
119
+ partitionBy: Columns to partition by.
120
+ orderBy: Columns to order by.
121
+
122
+ Returns:
123
+ Spark DataFrame with latest rows per partition.
124
+ """
95
125
  _require_pyspark("latest")
96
126
 
97
127
  partition_col_names = getAliases(partitionBy)
@@ -123,12 +153,30 @@ def _infer_time_col_spark(df: "pyspark.sql.DataFrame") -> str:
123
153
 
124
154
 
125
155
  def _filter_kwargs_for_callable(fn: object, kwargs: dict[str, Any]) -> dict[str, Any]:
156
+ """Filter kwargs to only those accepted by the callable.
157
+
158
+ Args:
159
+ fn: Callable to inspect.
160
+ kwargs: Candidate keyword arguments.
161
+
162
+ Returns:
163
+ Filtered keyword arguments.
164
+ """
126
165
  sig = inspect.signature(fn) # type: ignore[arg-type]
127
166
  allowed = set(sig.parameters.keys())
128
167
  return {k: v for k, v in kwargs.items() if (k in allowed and v is not None)}
129
168
 
130
169
 
131
170
  def _append_drop_col_to_spark_schema(schema: "T.StructType", drop_col: str) -> "T.StructType":
171
+ """Ensure the drop column exists in the Spark schema.
172
+
173
+ Args:
174
+ schema: Spark schema to augment.
175
+ drop_col: Column name to add if missing.
176
+
177
+ Returns:
178
+ Updated Spark schema.
179
+ """
132
180
  _require_pyspark("_append_drop_col_to_spark_schema")
133
181
  if drop_col in schema.fieldNames():
134
182
  return schema
@@ -169,6 +217,14 @@ def upsample(
169
217
  spark_schema = arrow_field_to_spark_field(options.target_field)
170
218
 
171
219
  def within_group(tb: pa.Table) -> pa.Table:
220
+ """Apply upsample logic to a grouped Arrow table.
221
+
222
+ Args:
223
+ tb: Arrow table for a grouped partition.
224
+
225
+ Returns:
226
+ Arrow table with upsampled data.
227
+ """
172
228
  res = (
173
229
  arrow_table_to_polars_dataframe(tb, options)
174
230
  .sort(time_col_name)
@@ -277,6 +333,14 @@ def resample(
277
333
  out_options = CastOptions.check_arg(out_arrow_field)
278
334
 
279
335
  def within_group(tb: pa.Table) -> pa.Table:
336
+ """Apply resample logic to a grouped Arrow table.
337
+
338
+ Args:
339
+ tb: Arrow table for a grouped partition.
340
+
341
+ Returns:
342
+ Arrow table with resampled data.
343
+ """
280
344
  from .polars_extensions import resample
281
345
 
282
346
  pdf = arrow_table_to_polars_dataframe(tb, in_options)
@@ -329,6 +393,18 @@ def checkJoin(
329
393
  *args,
330
394
  **kwargs,
331
395
  ):
396
+ """Join two DataFrames with schema-aware column casting.
397
+
398
+ Args:
399
+ df: Left Spark DataFrame.
400
+ other: Right Spark DataFrame.
401
+ on: Join keys or mapping.
402
+ *args: Positional args passed to join.
403
+ **kwargs: Keyword args passed to join.
404
+
405
+ Returns:
406
+ Joined Spark DataFrame.
407
+ """
332
408
  _require_pyspark("checkJoin")
333
409
 
334
410
  other = convert(other, SparkDataFrame)
@@ -371,12 +447,32 @@ def checkMapInArrow(
371
447
  *args,
372
448
  **kwargs,
373
449
  ):
450
+ """Wrap mapInArrow to enforce output schema conversion.
451
+
452
+ Args:
453
+ df: Spark DataFrame.
454
+ func: Generator function yielding RecordBatches.
455
+ schema: Output schema (Spark StructType or DDL string).
456
+ *args: Positional args passed to mapInArrow.
457
+ **kwargs: Keyword args passed to mapInArrow.
458
+
459
+ Returns:
460
+ Spark DataFrame with enforced schema.
461
+ """
374
462
  _require_pyspark("mapInArrow")
375
463
 
376
464
  spark_schema = convert(schema, T.StructType)
377
465
  arrow_schema = convert(schema, pa.Field)
378
466
 
379
467
  def patched(batches: Iterable[pa.RecordBatch]):
468
+ """Convert batches yielded by user function to the target schema.
469
+
470
+ Args:
471
+ batches: Input RecordBatch iterable.
472
+
473
+ Yields:
474
+ RecordBatch instances conforming to the output schema.
475
+ """
380
476
  for src in func(batches):
381
477
  yield convert(src, pa.RecordBatch, arrow_schema)
382
478
 
@@ -395,6 +491,18 @@ def checkMapInPandas(
395
491
  *args,
396
492
  **kwargs,
397
493
  ):
494
+ """Wrap mapInPandas to enforce output schema conversion.
495
+
496
+ Args:
497
+ df: Spark DataFrame.
498
+ func: Generator function yielding pandas DataFrames.
499
+ schema: Output schema (Spark StructType or DDL string).
500
+ *args: Positional args passed to mapInPandas.
501
+ **kwargs: Keyword args passed to mapInPandas.
502
+
503
+ Returns:
504
+ Spark DataFrame with enforced schema.
505
+ """
398
506
  _require_pyspark("mapInPandas")
399
507
 
400
508
  import pandas as _pd # local import so we don't shadow the ..pandas module
@@ -403,6 +511,14 @@ def checkMapInPandas(
403
511
  arrow_schema = convert(schema, pa.Field)
404
512
 
405
513
  def patched(batches: Iterable[_pd.DataFrame]):
514
+ """Convert pandas batches yielded by user function to the target schema.
515
+
516
+ Args:
517
+ batches: Input pandas DataFrame iterable.
518
+
519
+ Yields:
520
+ pandas DataFrames conforming to the output schema.
521
+ """
406
522
  for src in func(batches):
407
523
  yield convert(src, _pd.DataFrame, arrow_schema)
408
524
 
@@ -1,3 +1,5 @@
1
+ """Optional pandas dependency helpers."""
2
+
1
3
  try:
2
4
  import pandas # type: ignore
3
5
  pandas = pandas
@@ -6,6 +8,11 @@ except ImportError:
6
8
 
7
9
 
8
10
  def require_pandas():
11
+ """Ensure pandas is available before using pandas helpers.
12
+
13
+ Returns:
14
+ None.
15
+ """
9
16
  if pandas is None:
10
17
  raise ImportError(
11
18
  "pandas is required to use this function. "
@@ -1,3 +1,5 @@
1
+ """Optional Polars dependency helpers."""
2
+
1
3
  try:
2
4
  import polars # type: ignore
3
5
 
@@ -13,6 +15,11 @@ __all__ = [
13
15
 
14
16
 
15
17
  def require_polars():
18
+ """Ensure polars is available before using polars helpers.
19
+
20
+ Returns:
21
+ None.
22
+ """
16
23
  if polars is None:
17
24
  raise ImportError(
18
25
  "polars is required to use this function. "
@@ -1,3 +1,5 @@
1
+ """Optional Spark dependency helpers and Arrow/Spark type conversions."""
2
+
1
3
  from typing import Any
2
4
 
3
5
  import pyarrow as pa
@@ -51,18 +53,23 @@ except ImportError: # pragma: no cover - Spark not available
51
53
  pyspark = None
52
54
 
53
55
  class SparkSession:
56
+ """Fallback SparkSession placeholder when pyspark is unavailable."""
54
57
 
55
58
  @classmethod
56
59
  def getActiveSession(cls):
60
+ """Return None to indicate no active session is available."""
57
61
  return None
58
62
 
59
63
  class SparkDataFrame:
64
+ """Fallback DataFrame placeholder when pyspark is unavailable."""
60
65
  pass
61
66
 
62
67
  class SparkColumn:
68
+ """Fallback Column placeholder when pyspark is unavailable."""
63
69
  pass
64
70
 
65
71
  class SparkDataType:
72
+ """Fallback DataType placeholder when pyspark is unavailable."""
66
73
  pass
67
74
 
68
75
  ARROW_TO_SPARK = {}
@@ -91,6 +98,12 @@ __all__ = [
91
98
  def require_pyspark(active_session: bool = False):
92
99
  """
93
100
  Optionally enforce that pyspark (and an active SparkSession) exists.
101
+
102
+ Args:
103
+ active_session: Require an active SparkSession if True.
104
+
105
+ Returns:
106
+ None.
94
107
  """
95
108
  if pyspark is None:
96
109
  raise ImportError(
@@ -116,6 +129,13 @@ def arrow_type_to_spark_type(
116
129
  ) -> "T.DataType":
117
130
  """
118
131
  Convert a pyarrow.DataType to a pyspark.sql.types.DataType.
132
+
133
+ Args:
134
+ arrow_type: Arrow data type to convert.
135
+ cast_options: Optional casting options.
136
+
137
+ Returns:
138
+ Spark SQL data type.
119
139
  """
120
140
  require_pyspark()
121
141
 
@@ -191,6 +211,13 @@ def arrow_field_to_spark_field(
191
211
  ) -> "T.StructField":
192
212
  """
193
213
  Convert a pyarrow.Field to a pyspark StructField.
214
+
215
+ Args:
216
+ field: Arrow field to convert.
217
+ cast_options: Optional casting options.
218
+
219
+ Returns:
220
+ Spark StructField representation.
194
221
  """
195
222
  spark_type = arrow_type_to_spark_type(field.type, cast_options)
196
223
 
@@ -208,6 +235,13 @@ def spark_type_to_arrow_type(
208
235
  ) -> pa.DataType:
209
236
  """
210
237
  Convert a pyspark.sql.types.DataType to a pyarrow.DataType.
238
+
239
+ Args:
240
+ spark_type: Spark SQL data type to convert.
241
+ cast_options: Optional casting options.
242
+
243
+ Returns:
244
+ Arrow data type.
211
245
  """
212
246
  require_pyspark()
213
247
  from pyspark.sql.types import (
@@ -287,6 +321,13 @@ def spark_field_to_arrow_field(
287
321
  ) -> pa.Field:
288
322
  """
289
323
  Convert a pyspark StructField to a pyarrow.Field.
324
+
325
+ Args:
326
+ field: Spark StructField to convert.
327
+ cast_options: Optional casting options.
328
+
329
+ Returns:
330
+ Arrow field.
290
331
  """
291
332
  arrow_type = spark_type_to_arrow_type(field.dataType, cast_options)
292
333
 
@@ -1,4 +1,8 @@
1
+ """Python utility helpers for retries, parallelism, and environment management."""
2
+
1
3
  from .retry import retry
2
4
  from .parallel import parallelize
3
5
  from .python_env import PythonEnv
4
6
  from .callable_serde import CallableSerde
7
+
8
+ __all__ = ["retry", "parallelize", "PythonEnv", "CallableSerde"]