ygg 0.1.31__py3-none-any.whl → 0.1.32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. {ygg-0.1.31.dist-info → ygg-0.1.32.dist-info}/METADATA +1 -1
  2. ygg-0.1.32.dist-info/RECORD +60 -0
  3. yggdrasil/__init__.py +2 -0
  4. yggdrasil/databricks/__init__.py +2 -0
  5. yggdrasil/databricks/compute/__init__.py +2 -0
  6. yggdrasil/databricks/compute/cluster.py +241 -2
  7. yggdrasil/databricks/compute/execution_context.py +100 -11
  8. yggdrasil/databricks/compute/remote.py +16 -0
  9. yggdrasil/databricks/jobs/__init__.py +5 -0
  10. yggdrasil/databricks/jobs/config.py +29 -4
  11. yggdrasil/databricks/sql/__init__.py +2 -0
  12. yggdrasil/databricks/sql/engine.py +217 -36
  13. yggdrasil/databricks/sql/exceptions.py +1 -0
  14. yggdrasil/databricks/sql/statement_result.py +147 -0
  15. yggdrasil/databricks/sql/types.py +33 -1
  16. yggdrasil/databricks/workspaces/__init__.py +2 -1
  17. yggdrasil/databricks/workspaces/filesytem.py +183 -0
  18. yggdrasil/databricks/workspaces/io.py +387 -9
  19. yggdrasil/databricks/workspaces/path.py +297 -2
  20. yggdrasil/databricks/workspaces/path_kind.py +3 -0
  21. yggdrasil/databricks/workspaces/workspace.py +202 -5
  22. yggdrasil/dataclasses/__init__.py +2 -0
  23. yggdrasil/dataclasses/dataclass.py +42 -1
  24. yggdrasil/libs/__init__.py +2 -0
  25. yggdrasil/libs/databrickslib.py +9 -0
  26. yggdrasil/libs/extensions/__init__.py +2 -0
  27. yggdrasil/libs/extensions/polars_extensions.py +72 -0
  28. yggdrasil/libs/extensions/spark_extensions.py +116 -0
  29. yggdrasil/libs/pandaslib.py +7 -0
  30. yggdrasil/libs/polarslib.py +7 -0
  31. yggdrasil/libs/sparklib.py +41 -0
  32. yggdrasil/pyutils/__init__.py +4 -0
  33. yggdrasil/pyutils/callable_serde.py +106 -0
  34. yggdrasil/pyutils/exceptions.py +16 -0
  35. yggdrasil/pyutils/modules.py +44 -1
  36. yggdrasil/pyutils/parallel.py +29 -0
  37. yggdrasil/pyutils/python_env.py +301 -0
  38. yggdrasil/pyutils/retry.py +57 -0
  39. yggdrasil/requests/__init__.py +4 -0
  40. yggdrasil/requests/msal.py +124 -3
  41. yggdrasil/requests/session.py +18 -0
  42. yggdrasil/types/__init__.py +2 -0
  43. yggdrasil/types/cast/__init__.py +2 -1
  44. yggdrasil/types/cast/arrow_cast.py +123 -1
  45. yggdrasil/types/cast/cast_options.py +119 -1
  46. yggdrasil/types/cast/pandas_cast.py +29 -0
  47. yggdrasil/types/cast/polars_cast.py +47 -0
  48. yggdrasil/types/cast/polars_pandas_cast.py +29 -0
  49. yggdrasil/types/cast/registry.py +176 -0
  50. yggdrasil/types/cast/spark_cast.py +76 -0
  51. yggdrasil/types/cast/spark_pandas_cast.py +29 -0
  52. yggdrasil/types/cast/spark_polars_cast.py +28 -0
  53. yggdrasil/types/libs.py +2 -0
  54. yggdrasil/types/python_arrow.py +191 -0
  55. yggdrasil/types/python_defaults.py +73 -0
  56. yggdrasil/version.py +1 -0
  57. ygg-0.1.31.dist-info/RECORD +0 -59
  58. {ygg-0.1.31.dist-info → ygg-0.1.32.dist-info}/WHEEL +0 -0
  59. {ygg-0.1.31.dist-info → ygg-0.1.32.dist-info}/entry_points.txt +0 -0
  60. {ygg-0.1.31.dist-info → ygg-0.1.32.dist-info}/licenses/LICENSE +0 -0
  61. {ygg-0.1.31.dist-info → ygg-0.1.32.dist-info}/top_level.txt +0 -0
@@ -1,3 +1,5 @@
1
+ """Spark DataFrame extension helpers for aliases and resampling."""
2
+
1
3
  import datetime
2
4
  import inspect
3
5
  import re
@@ -30,6 +32,15 @@ _COL_RE = re.compile(r"Column<\s*['\"]?`?(.+?)`?['\"]?\s*>")
30
32
 
31
33
 
32
34
  def _require_pyspark(fn_name: str) -> None:
35
+ """Raise when PySpark is unavailable for a requested helper."""
36
+ """Raise when PySpark is unavailable for a requested helper.
37
+
38
+ Args:
39
+ fn_name: Name of the calling function.
40
+
41
+ Returns:
42
+ None.
43
+ """
33
44
  if pyspark is None or F is None or T is None:
34
45
  raise RuntimeError(
35
46
  f"{fn_name} requires PySpark to be available. "
@@ -41,6 +52,15 @@ def getAliases(
41
52
  obj: Union[SparkDataFrame, SparkColumn, str, Iterable[Union[SparkDataFrame, SparkColumn, str]]],
42
53
  full: bool = True,
43
54
  ) -> list[str]:
55
+ """Return aliases for Spark columns/dataframes or collections.
56
+
57
+ Args:
58
+ obj: Spark DataFrame/Column, string, or iterable of these.
59
+ full: Whether to return full qualified names.
60
+
61
+ Returns:
62
+ List of alias strings.
63
+ """
44
64
  if obj is None:
45
65
  return []
46
66
 
@@ -92,6 +112,16 @@ def latest(
92
112
  partitionBy: List[Union[str, SparkColumn]],
93
113
  orderBy: List[Union[str, SparkColumn]],
94
114
  ) -> SparkDataFrame:
115
+ """Return the latest rows per partition based on ordering.
116
+
117
+ Args:
118
+ df: Spark DataFrame.
119
+ partitionBy: Columns to partition by.
120
+ orderBy: Columns to order by.
121
+
122
+ Returns:
123
+ Spark DataFrame with latest rows per partition.
124
+ """
95
125
  _require_pyspark("latest")
96
126
 
97
127
  partition_col_names = getAliases(partitionBy)
@@ -123,12 +153,30 @@ def _infer_time_col_spark(df: "pyspark.sql.DataFrame") -> str:
123
153
 
124
154
 
125
155
  def _filter_kwargs_for_callable(fn: object, kwargs: dict[str, Any]) -> dict[str, Any]:
156
+ """Filter kwargs to only those accepted by the callable.
157
+
158
+ Args:
159
+ fn: Callable to inspect.
160
+ kwargs: Candidate keyword arguments.
161
+
162
+ Returns:
163
+ Filtered keyword arguments.
164
+ """
126
165
  sig = inspect.signature(fn) # type: ignore[arg-type]
127
166
  allowed = set(sig.parameters.keys())
128
167
  return {k: v for k, v in kwargs.items() if (k in allowed and v is not None)}
129
168
 
130
169
 
131
170
  def _append_drop_col_to_spark_schema(schema: "T.StructType", drop_col: str) -> "T.StructType":
171
+ """Ensure the drop column exists in the Spark schema.
172
+
173
+ Args:
174
+ schema: Spark schema to augment.
175
+ drop_col: Column name to add if missing.
176
+
177
+ Returns:
178
+ Updated Spark schema.
179
+ """
132
180
  _require_pyspark("_append_drop_col_to_spark_schema")
133
181
  if drop_col in schema.fieldNames():
134
182
  return schema
@@ -169,6 +217,14 @@ def upsample(
169
217
  spark_schema = arrow_field_to_spark_field(options.target_field)
170
218
 
171
219
  def within_group(tb: pa.Table) -> pa.Table:
220
+ """Apply upsample logic to a grouped Arrow table.
221
+
222
+ Args:
223
+ tb: Arrow table for a grouped partition.
224
+
225
+ Returns:
226
+ Arrow table with upsampled data.
227
+ """
172
228
  res = (
173
229
  arrow_table_to_polars_dataframe(tb, options)
174
230
  .sort(time_col_name)
@@ -277,6 +333,14 @@ def resample(
277
333
  out_options = CastOptions.check_arg(out_arrow_field)
278
334
 
279
335
  def within_group(tb: pa.Table) -> pa.Table:
336
+ """Apply resample logic to a grouped Arrow table.
337
+
338
+ Args:
339
+ tb: Arrow table for a grouped partition.
340
+
341
+ Returns:
342
+ Arrow table with resampled data.
343
+ """
280
344
  from .polars_extensions import resample
281
345
 
282
346
  pdf = arrow_table_to_polars_dataframe(tb, in_options)
@@ -329,6 +393,18 @@ def checkJoin(
329
393
  *args,
330
394
  **kwargs,
331
395
  ):
396
+ """Join two DataFrames with schema-aware column casting.
397
+
398
+ Args:
399
+ df: Left Spark DataFrame.
400
+ other: Right Spark DataFrame.
401
+ on: Join keys or mapping.
402
+ *args: Positional args passed to join.
403
+ **kwargs: Keyword args passed to join.
404
+
405
+ Returns:
406
+ Joined Spark DataFrame.
407
+ """
332
408
  _require_pyspark("checkJoin")
333
409
 
334
410
  other = convert(other, SparkDataFrame)
@@ -371,12 +447,32 @@ def checkMapInArrow(
371
447
  *args,
372
448
  **kwargs,
373
449
  ):
450
+ """Wrap mapInArrow to enforce output schema conversion.
451
+
452
+ Args:
453
+ df: Spark DataFrame.
454
+ func: Generator function yielding RecordBatches.
455
+ schema: Output schema (Spark StructType or DDL string).
456
+ *args: Positional args passed to mapInArrow.
457
+ **kwargs: Keyword args passed to mapInArrow.
458
+
459
+ Returns:
460
+ Spark DataFrame with enforced schema.
461
+ """
374
462
  _require_pyspark("mapInArrow")
375
463
 
376
464
  spark_schema = convert(schema, T.StructType)
377
465
  arrow_schema = convert(schema, pa.Field)
378
466
 
379
467
  def patched(batches: Iterable[pa.RecordBatch]):
468
+ """Convert batches yielded by user function to the target schema.
469
+
470
+ Args:
471
+ batches: Input RecordBatch iterable.
472
+
473
+ Yields:
474
+ RecordBatch instances conforming to the output schema.
475
+ """
380
476
  for src in func(batches):
381
477
  yield convert(src, pa.RecordBatch, arrow_schema)
382
478
 
@@ -395,6 +491,18 @@ def checkMapInPandas(
395
491
  *args,
396
492
  **kwargs,
397
493
  ):
494
+ """Wrap mapInPandas to enforce output schema conversion.
495
+
496
+ Args:
497
+ df: Spark DataFrame.
498
+ func: Generator function yielding pandas DataFrames.
499
+ schema: Output schema (Spark StructType or DDL string).
500
+ *args: Positional args passed to mapInPandas.
501
+ **kwargs: Keyword args passed to mapInPandas.
502
+
503
+ Returns:
504
+ Spark DataFrame with enforced schema.
505
+ """
398
506
  _require_pyspark("mapInPandas")
399
507
 
400
508
  import pandas as _pd # local import so we don't shadow the ..pandas module
@@ -403,6 +511,14 @@ def checkMapInPandas(
403
511
  arrow_schema = convert(schema, pa.Field)
404
512
 
405
513
  def patched(batches: Iterable[_pd.DataFrame]):
514
+ """Convert pandas batches yielded by user function to the target schema.
515
+
516
+ Args:
517
+ batches: Input pandas DataFrame iterable.
518
+
519
+ Yields:
520
+ pandas DataFrames conforming to the output schema.
521
+ """
406
522
  for src in func(batches):
407
523
  yield convert(src, _pd.DataFrame, arrow_schema)
408
524
 
@@ -1,3 +1,5 @@
1
+ """Optional pandas dependency helpers."""
2
+
1
3
  try:
2
4
  import pandas # type: ignore
3
5
  pandas = pandas
@@ -6,6 +8,11 @@ except ImportError:
6
8
 
7
9
 
8
10
  def require_pandas():
11
+ """Ensure pandas is available before using pandas helpers.
12
+
13
+ Returns:
14
+ None.
15
+ """
9
16
  if pandas is None:
10
17
  raise ImportError(
11
18
  "pandas is required to use this function. "
@@ -1,3 +1,5 @@
1
+ """Optional Polars dependency helpers."""
2
+
1
3
  try:
2
4
  import polars # type: ignore
3
5
 
@@ -13,6 +15,11 @@ __all__ = [
13
15
 
14
16
 
15
17
  def require_polars():
18
+ """Ensure polars is available before using polars helpers.
19
+
20
+ Returns:
21
+ None.
22
+ """
16
23
  if polars is None:
17
24
  raise ImportError(
18
25
  "polars is required to use this function. "
@@ -1,3 +1,5 @@
1
+ """Optional Spark dependency helpers and Arrow/Spark type conversions."""
2
+
1
3
  from typing import Any
2
4
 
3
5
  import pyarrow as pa
@@ -51,18 +53,23 @@ except ImportError: # pragma: no cover - Spark not available
51
53
  pyspark = None
52
54
 
53
55
  class SparkSession:
56
+ """Fallback SparkSession placeholder when pyspark is unavailable."""
54
57
 
55
58
  @classmethod
56
59
  def getActiveSession(cls):
60
+ """Return None to indicate no active session is available."""
57
61
  return None
58
62
 
59
63
  class SparkDataFrame:
64
+ """Fallback DataFrame placeholder when pyspark is unavailable."""
60
65
  pass
61
66
 
62
67
  class SparkColumn:
68
+ """Fallback Column placeholder when pyspark is unavailable."""
63
69
  pass
64
70
 
65
71
  class SparkDataType:
72
+ """Fallback DataType placeholder when pyspark is unavailable."""
66
73
  pass
67
74
 
68
75
  ARROW_TO_SPARK = {}
@@ -91,6 +98,12 @@ __all__ = [
91
98
  def require_pyspark(active_session: bool = False):
92
99
  """
93
100
  Optionally enforce that pyspark (and an active SparkSession) exists.
101
+
102
+ Args:
103
+ active_session: Require an active SparkSession if True.
104
+
105
+ Returns:
106
+ None.
94
107
  """
95
108
  if pyspark is None:
96
109
  raise ImportError(
@@ -116,6 +129,13 @@ def arrow_type_to_spark_type(
116
129
  ) -> "T.DataType":
117
130
  """
118
131
  Convert a pyarrow.DataType to a pyspark.sql.types.DataType.
132
+
133
+ Args:
134
+ arrow_type: Arrow data type to convert.
135
+ cast_options: Optional casting options.
136
+
137
+ Returns:
138
+ Spark SQL data type.
119
139
  """
120
140
  require_pyspark()
121
141
 
@@ -191,6 +211,13 @@ def arrow_field_to_spark_field(
191
211
  ) -> "T.StructField":
192
212
  """
193
213
  Convert a pyarrow.Field to a pyspark StructField.
214
+
215
+ Args:
216
+ field: Arrow field to convert.
217
+ cast_options: Optional casting options.
218
+
219
+ Returns:
220
+ Spark StructField representation.
194
221
  """
195
222
  spark_type = arrow_type_to_spark_type(field.type, cast_options)
196
223
 
@@ -208,6 +235,13 @@ def spark_type_to_arrow_type(
208
235
  ) -> pa.DataType:
209
236
  """
210
237
  Convert a pyspark.sql.types.DataType to a pyarrow.DataType.
238
+
239
+ Args:
240
+ spark_type: Spark SQL data type to convert.
241
+ cast_options: Optional casting options.
242
+
243
+ Returns:
244
+ Arrow data type.
211
245
  """
212
246
  require_pyspark()
213
247
  from pyspark.sql.types import (
@@ -287,6 +321,13 @@ def spark_field_to_arrow_field(
287
321
  ) -> pa.Field:
288
322
  """
289
323
  Convert a pyspark StructField to a pyarrow.Field.
324
+
325
+ Args:
326
+ field: Spark StructField to convert.
327
+ cast_options: Optional casting options.
328
+
329
+ Returns:
330
+ Arrow field.
290
331
  """
291
332
  arrow_type = spark_type_to_arrow_type(field.dataType, cast_options)
292
333
 
@@ -1,4 +1,8 @@
1
+ """Python utility helpers for retries, parallelism, and environment management."""
2
+
1
3
  from .retry import retry
2
4
  from .parallel import parallelize
3
5
  from .python_env import PythonEnv
4
6
  from .callable_serde import CallableSerde
7
+
8
+ __all__ = ["retry", "parallelize", "PythonEnv", "CallableSerde"]
@@ -1,3 +1,5 @@
1
+ """Callable serialization helpers for cross-process execution."""
2
+
1
3
  from __future__ import annotations
2
4
 
3
5
  import base64
@@ -26,6 +28,15 @@ _FLAG_COMPRESSED = 1
26
28
 
27
29
 
28
30
  def _resolve_attr_chain(mod: Any, qualname: str) -> Any:
31
+ """Resolve a dotted attribute path from a module.
32
+
33
+ Args:
34
+ mod: Module to traverse.
35
+ qualname: Dotted qualified name.
36
+
37
+ Returns:
38
+ Resolved attribute.
39
+ """
29
40
  obj = mod
30
41
  for part in qualname.split("."):
31
42
  obj = getattr(obj, part)
@@ -49,6 +60,14 @@ def _find_pkg_root_from_file(file_path: Path) -> Optional[Path]:
49
60
 
50
61
 
51
62
  def _callable_file_line(fn: Callable[..., Any]) -> Tuple[Optional[str], Optional[int]]:
63
+ """Return the source file path and line number for a callable.
64
+
65
+ Args:
66
+ fn: Callable to inspect.
67
+
68
+ Returns:
69
+ Tuple of (file path, line number).
70
+ """
52
71
  file = None
53
72
  line = None
54
73
  try:
@@ -85,6 +104,14 @@ def _referenced_global_names(fn: Callable[..., Any]) -> Set[str]:
85
104
 
86
105
 
87
106
  def _is_importable_reference(fn: Callable[..., Any]) -> bool:
107
+ """Return True when a callable can be imported by module and qualname.
108
+
109
+ Args:
110
+ fn: Callable to inspect.
111
+
112
+ Returns:
113
+ True if importable by module/qualname.
114
+ """
88
115
  mod_name = getattr(fn, "__module__", None)
89
116
  qualname = getattr(fn, "__qualname__", None)
90
117
  if not mod_name or not qualname:
@@ -245,6 +272,14 @@ class CallableSerde:
245
272
 
246
273
  @classmethod
247
274
  def from_callable(cls: type[T], x: Union[Callable[..., Any], T]) -> T:
275
+ """Create a CallableSerde from a callable or existing instance.
276
+
277
+ Args:
278
+ x: Callable or CallableSerde instance.
279
+
280
+ Returns:
281
+ CallableSerde instance.
282
+ """
248
283
  if isinstance(x, cls):
249
284
  return x
250
285
 
@@ -256,14 +291,29 @@ class CallableSerde:
256
291
 
257
292
  @property
258
293
  def module(self) -> Optional[str]:
294
+ """Return the callable's module name if available.
295
+
296
+ Returns:
297
+ Module name or None.
298
+ """
259
299
  return self._module or (getattr(self.fn, "__module__", None) if self.fn else None)
260
300
 
261
301
  @property
262
302
  def qualname(self) -> Optional[str]:
303
+ """Return the callable's qualified name if available.
304
+
305
+ Returns:
306
+ Qualified name or None.
307
+ """
263
308
  return self._qualname or (getattr(self.fn, "__qualname__", None) if self.fn else None)
264
309
 
265
310
  @property
266
311
  def file(self) -> Optional[str]:
312
+ """Return the filesystem path of the callable's source file.
313
+
314
+ Returns:
315
+ File path or None.
316
+ """
267
317
  if not self.fn:
268
318
  return None
269
319
  f, _ = _callable_file_line(self.fn)
@@ -271,6 +321,11 @@ class CallableSerde:
271
321
 
272
322
  @property
273
323
  def line(self) -> Optional[int]:
324
+ """Return the line number where the callable is defined.
325
+
326
+ Returns:
327
+ Line number or None.
328
+ """
274
329
  if not self.fn:
275
330
  return None
276
331
  _, ln = _callable_file_line(self.fn)
@@ -278,6 +333,11 @@ class CallableSerde:
278
333
 
279
334
  @property
280
335
  def pkg_root(self) -> Optional[str]:
336
+ """Return the inferred package root for the callable, if known.
337
+
338
+ Returns:
339
+ Package root path or None.
340
+ """
281
341
  if self._pkg_root:
282
342
  return self._pkg_root
283
343
  if not self.file:
@@ -287,6 +347,11 @@ class CallableSerde:
287
347
 
288
348
  @property
289
349
  def relpath_from_pkg_root(self) -> Optional[str]:
350
+ """Return the callable's path relative to the package root.
351
+
352
+ Returns:
353
+ Relative path or None.
354
+ """
290
355
  if not self.file or not self.pkg_root:
291
356
  return None
292
357
  try:
@@ -296,6 +361,11 @@ class CallableSerde:
296
361
 
297
362
  @property
298
363
  def importable(self) -> bool:
364
+ """Return True when the callable can be imported by reference.
365
+
366
+ Returns:
367
+ True if importable by module/qualname.
368
+ """
299
369
  if self.fn is None:
300
370
  return bool(self.module and self.qualname and "<locals>" not in (self.qualname or ""))
301
371
  return _is_importable_reference(self.fn)
@@ -309,6 +379,16 @@ class CallableSerde:
309
379
  dump_env: str = "none", # "none" | "globals" | "closure" | "both"
310
380
  filter_used_globals: bool = True,
311
381
  ) -> Dict[str, Any]:
382
+ """Serialize the callable into a dict for transport.
383
+
384
+ Args:
385
+ prefer: Preferred serialization kind.
386
+ dump_env: Environment payload selection.
387
+ filter_used_globals: Filter globals to referenced names.
388
+
389
+ Returns:
390
+ Serialized payload dict.
391
+ """
312
392
  kind = prefer
313
393
  if kind == "import" and not self.importable:
314
394
  kind = "dill"
@@ -352,6 +432,15 @@ class CallableSerde:
352
432
 
353
433
  @classmethod
354
434
  def load(cls: type[T], d: Dict[str, Any], *, add_pkg_root_to_syspath: bool = True) -> T:
435
+ """Construct a CallableSerde from a serialized dict payload.
436
+
437
+ Args:
438
+ d: Serialized payload dict.
439
+ add_pkg_root_to_syspath: Add package root to sys.path if True.
440
+
441
+ Returns:
442
+ CallableSerde instance.
443
+ """
355
444
  obj = cls(
356
445
  fn=None,
357
446
  _kind=d.get("kind", "auto"),
@@ -369,6 +458,14 @@ class CallableSerde:
369
458
  return obj # type: ignore[return-value]
370
459
 
371
460
  def materialize(self, *, add_pkg_root_to_syspath: bool = True) -> Callable[..., Any]:
461
+ """Resolve and return the underlying callable.
462
+
463
+ Args:
464
+ add_pkg_root_to_syspath: Add package root to sys.path if True.
465
+
466
+ Returns:
467
+ Resolved callable.
468
+ """
372
469
  if self.fn is not None:
373
470
  return self.fn
374
471
 
@@ -402,6 +499,15 @@ class CallableSerde:
402
499
  raise ValueError(f"Unknown kind: {kind}")
403
500
 
404
501
  def __call__(self, *args: Any, **kwargs: Any) -> Any:
502
+ """Invoke the materialized callable with the provided arguments.
503
+
504
+ Args:
505
+ *args: Positional args for the callable.
506
+ **kwargs: Keyword args for the callable.
507
+
508
+ Returns:
509
+ Callable return value.
510
+ """
405
511
  fn = self.materialize()
406
512
  return fn(*args, **kwargs)
407
513
 
@@ -1,3 +1,5 @@
1
+ """Utilities for parsing and re-raising exceptions from traceback strings."""
2
+
1
3
  import builtins
2
4
  import dataclasses as dc
3
5
  import re
@@ -26,6 +28,7 @@ _BARE_EXC_RE = re.compile(r"(?m)^\s*([A-Za-z_]\w*(?:Error|Exception|Warning|Inte
26
28
 
27
29
  @dc.dataclass(frozen=True)
28
30
  class ParsedException:
31
+ """Structured representation of a parsed exception type and message."""
29
32
  exc_type: Type[BaseException]
30
33
  message: str
31
34
  raw_type_name: str
@@ -34,10 +37,23 @@ class ParsedException:
34
37
  class RemoteTraceback(Exception):
35
38
  """Holds a traceback *string* and prints it as the chained cause."""
36
39
  def __init__(self, traceback_text: str):
40
+ """Store the traceback text for later display.
41
+
42
+ Args:
43
+ traceback_text: Traceback string to store.
44
+
45
+ Returns:
46
+ None.
47
+ """
37
48
  super().__init__("Remote traceback (text)")
38
49
  self.traceback_text = traceback_text
39
50
 
40
51
  def __str__(self) -> str:
52
+ """Render the exception with its stored traceback text.
53
+
54
+ Returns:
55
+ Rendered exception string with traceback text.
56
+ """
41
57
  return f"{self.args[0]}\n\n{self.traceback_text}"
42
58
 
43
59
 
@@ -1,3 +1,5 @@
1
+ """Module dependency and pip index inspection utilities."""
2
+
1
3
  # modules.py
2
4
  from __future__ import annotations
3
5
 
@@ -43,6 +45,14 @@ MODULE_PROJECT_NAMES_ALIASES = {
43
45
 
44
46
 
45
47
  def module_name_to_project_name(module_name: str) -> str:
48
+ """Map module import names to PyPI project names when they differ.
49
+
50
+ Args:
51
+ module_name: Importable module name.
52
+
53
+ Returns:
54
+ PyPI project name.
55
+ """
46
56
  return MODULE_PROJECT_NAMES_ALIASES.get(module_name, module_name)
47
57
 
48
58
 
@@ -104,6 +114,7 @@ _REQ_NAME_RE = re.compile(r"^\s*([A-Za-z0-9][A-Za-z0-9._-]*)")
104
114
 
105
115
  @dc.dataclass(frozen=True)
106
116
  class DependencyMetadata:
117
+ """Metadata describing an installed or missing dependency."""
107
118
  project: str
108
119
  requirement: str
109
120
  installed: bool
@@ -136,6 +147,14 @@ def _req_project_name(req_line: str) -> Optional[str]:
136
147
 
137
148
 
138
149
  def _distribution_for_module(mod: Union[str, ModuleType]):
150
+ """Resolve the importlib.metadata distribution that provides a module.
151
+
152
+ Args:
153
+ mod: Module name or module object.
154
+
155
+ Returns:
156
+ importlib.metadata.Distribution instance.
157
+ """
139
158
  if ilm is None:
140
159
  raise RuntimeError("importlib.metadata is not available")
141
160
 
@@ -213,6 +232,14 @@ def module_dependencies(lib: Union[str, ModuleType]) -> List[DependencyMetadata]
213
232
 
214
233
 
215
234
  def _run_pip(*args: str) -> Tuple[int, str, str]:
235
+ """Run pip with arguments and return (returncode, stdout, stderr).
236
+
237
+ Args:
238
+ *args: Pip arguments.
239
+
240
+ Returns:
241
+ Tuple of (returncode, stdout, stderr).
242
+ """
216
243
  p = subprocess.run(
217
244
  [sys.executable, "-m", "pip", *args],
218
245
  text=True,
@@ -225,21 +252,37 @@ def _run_pip(*args: str) -> Tuple[int, str, str]:
225
252
 
226
253
  @dc.dataclass(frozen=True)
227
254
  class PipIndexSettings:
255
+ """Resolved pip index configuration from env and config sources."""
228
256
  index_url: Optional[str] = None
229
257
  extra_index_urls: List[str] = dc.field(default_factory=list)
230
258
  sources: Dict[str, Dict[str, Any]] = dc.field(default_factory=dict) # {"env": {...}, "config": {...}}
231
259
 
232
260
  @classmethod
233
261
  def default_settings(cls):
262
+ """Return the cached default pip index settings.
263
+
264
+ Returns:
265
+ Default PipIndexSettings instance.
266
+ """
234
267
  return DEFAULT_PIP_INDEX_SETTINGS
235
268
 
236
269
  @property
237
270
  def extra_index_url(self):
271
+ """Return extra index URLs as a space-separated string.
272
+
273
+ Returns:
274
+ Space-separated extra index URLs or None.
275
+ """
238
276
  if self.extra_index_urls:
239
277
  return " ".join(self.extra_index_urls)
240
278
  return None
241
279
 
242
280
  def as_dict(self) -> dict:
281
+ """Return a dict representation of the settings.
282
+
283
+ Returns:
284
+ Dict representation of settings.
285
+ """
243
286
  return dc.asdict(self)
244
287
 
245
288
 
@@ -325,4 +368,4 @@ def get_pip_index_settings() -> PipIndexSettings:
325
368
  try:
326
369
  DEFAULT_PIP_INDEX_SETTINGS = get_pip_index_settings()
327
370
  except:
328
- DEFAULT_PIP_INDEX_SETTINGS = PipIndexSettings()
371
+ DEFAULT_PIP_INDEX_SETTINGS = PipIndexSettings()