ygg 0.1.31__py3-none-any.whl → 0.1.33__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ygg-0.1.31.dist-info → ygg-0.1.33.dist-info}/METADATA +1 -1
- ygg-0.1.33.dist-info/RECORD +60 -0
- yggdrasil/__init__.py +2 -0
- yggdrasil/databricks/__init__.py +2 -0
- yggdrasil/databricks/compute/__init__.py +2 -0
- yggdrasil/databricks/compute/cluster.py +244 -3
- yggdrasil/databricks/compute/execution_context.py +100 -11
- yggdrasil/databricks/compute/remote.py +24 -0
- yggdrasil/databricks/jobs/__init__.py +5 -0
- yggdrasil/databricks/jobs/config.py +29 -4
- yggdrasil/databricks/sql/__init__.py +2 -0
- yggdrasil/databricks/sql/engine.py +217 -36
- yggdrasil/databricks/sql/exceptions.py +1 -0
- yggdrasil/databricks/sql/statement_result.py +147 -0
- yggdrasil/databricks/sql/types.py +33 -1
- yggdrasil/databricks/workspaces/__init__.py +2 -1
- yggdrasil/databricks/workspaces/filesytem.py +183 -0
- yggdrasil/databricks/workspaces/io.py +387 -9
- yggdrasil/databricks/workspaces/path.py +297 -2
- yggdrasil/databricks/workspaces/path_kind.py +3 -0
- yggdrasil/databricks/workspaces/workspace.py +202 -5
- yggdrasil/dataclasses/__init__.py +2 -0
- yggdrasil/dataclasses/dataclass.py +42 -1
- yggdrasil/libs/__init__.py +2 -0
- yggdrasil/libs/databrickslib.py +9 -0
- yggdrasil/libs/extensions/__init__.py +2 -0
- yggdrasil/libs/extensions/polars_extensions.py +72 -0
- yggdrasil/libs/extensions/spark_extensions.py +116 -0
- yggdrasil/libs/pandaslib.py +7 -0
- yggdrasil/libs/polarslib.py +7 -0
- yggdrasil/libs/sparklib.py +41 -0
- yggdrasil/pyutils/__init__.py +4 -0
- yggdrasil/pyutils/callable_serde.py +106 -0
- yggdrasil/pyutils/exceptions.py +16 -0
- yggdrasil/pyutils/modules.py +44 -1
- yggdrasil/pyutils/parallel.py +29 -0
- yggdrasil/pyutils/python_env.py +301 -0
- yggdrasil/pyutils/retry.py +57 -0
- yggdrasil/requests/__init__.py +4 -0
- yggdrasil/requests/msal.py +124 -3
- yggdrasil/requests/session.py +18 -0
- yggdrasil/types/__init__.py +2 -0
- yggdrasil/types/cast/__init__.py +2 -1
- yggdrasil/types/cast/arrow_cast.py +123 -1
- yggdrasil/types/cast/cast_options.py +119 -1
- yggdrasil/types/cast/pandas_cast.py +29 -0
- yggdrasil/types/cast/polars_cast.py +47 -0
- yggdrasil/types/cast/polars_pandas_cast.py +29 -0
- yggdrasil/types/cast/registry.py +176 -0
- yggdrasil/types/cast/spark_cast.py +76 -0
- yggdrasil/types/cast/spark_pandas_cast.py +29 -0
- yggdrasil/types/cast/spark_polars_cast.py +28 -0
- yggdrasil/types/libs.py +2 -0
- yggdrasil/types/python_arrow.py +191 -0
- yggdrasil/types/python_defaults.py +73 -0
- yggdrasil/version.py +1 -0
- ygg-0.1.31.dist-info/RECORD +0 -59
- {ygg-0.1.31.dist-info → ygg-0.1.33.dist-info}/WHEEL +0 -0
- {ygg-0.1.31.dist-info → ygg-0.1.33.dist-info}/entry_points.txt +0 -0
- {ygg-0.1.31.dist-info → ygg-0.1.33.dist-info}/licenses/LICENSE +0 -0
- {ygg-0.1.31.dist-info → ygg-0.1.33.dist-info}/top_level.txt +0 -0
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
"""Spark DataFrame extension helpers for aliases and resampling."""
|
|
2
|
+
|
|
1
3
|
import datetime
|
|
2
4
|
import inspect
|
|
3
5
|
import re
|
|
@@ -30,6 +32,15 @@ _COL_RE = re.compile(r"Column<\s*['\"]?`?(.+?)`?['\"]?\s*>")
|
|
|
30
32
|
|
|
31
33
|
|
|
32
34
|
def _require_pyspark(fn_name: str) -> None:
|
|
35
|
+
"""Raise when PySpark is unavailable for a requested helper."""
|
|
36
|
+
"""Raise when PySpark is unavailable for a requested helper.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
fn_name: Name of the calling function.
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
None.
|
|
43
|
+
"""
|
|
33
44
|
if pyspark is None or F is None or T is None:
|
|
34
45
|
raise RuntimeError(
|
|
35
46
|
f"{fn_name} requires PySpark to be available. "
|
|
@@ -41,6 +52,15 @@ def getAliases(
|
|
|
41
52
|
obj: Union[SparkDataFrame, SparkColumn, str, Iterable[Union[SparkDataFrame, SparkColumn, str]]],
|
|
42
53
|
full: bool = True,
|
|
43
54
|
) -> list[str]:
|
|
55
|
+
"""Return aliases for Spark columns/dataframes or collections.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
obj: Spark DataFrame/Column, string, or iterable of these.
|
|
59
|
+
full: Whether to return full qualified names.
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
List of alias strings.
|
|
63
|
+
"""
|
|
44
64
|
if obj is None:
|
|
45
65
|
return []
|
|
46
66
|
|
|
@@ -92,6 +112,16 @@ def latest(
|
|
|
92
112
|
partitionBy: List[Union[str, SparkColumn]],
|
|
93
113
|
orderBy: List[Union[str, SparkColumn]],
|
|
94
114
|
) -> SparkDataFrame:
|
|
115
|
+
"""Return the latest rows per partition based on ordering.
|
|
116
|
+
|
|
117
|
+
Args:
|
|
118
|
+
df: Spark DataFrame.
|
|
119
|
+
partitionBy: Columns to partition by.
|
|
120
|
+
orderBy: Columns to order by.
|
|
121
|
+
|
|
122
|
+
Returns:
|
|
123
|
+
Spark DataFrame with latest rows per partition.
|
|
124
|
+
"""
|
|
95
125
|
_require_pyspark("latest")
|
|
96
126
|
|
|
97
127
|
partition_col_names = getAliases(partitionBy)
|
|
@@ -123,12 +153,30 @@ def _infer_time_col_spark(df: "pyspark.sql.DataFrame") -> str:
|
|
|
123
153
|
|
|
124
154
|
|
|
125
155
|
def _filter_kwargs_for_callable(fn: object, kwargs: dict[str, Any]) -> dict[str, Any]:
|
|
156
|
+
"""Filter kwargs to only those accepted by the callable.
|
|
157
|
+
|
|
158
|
+
Args:
|
|
159
|
+
fn: Callable to inspect.
|
|
160
|
+
kwargs: Candidate keyword arguments.
|
|
161
|
+
|
|
162
|
+
Returns:
|
|
163
|
+
Filtered keyword arguments.
|
|
164
|
+
"""
|
|
126
165
|
sig = inspect.signature(fn) # type: ignore[arg-type]
|
|
127
166
|
allowed = set(sig.parameters.keys())
|
|
128
167
|
return {k: v for k, v in kwargs.items() if (k in allowed and v is not None)}
|
|
129
168
|
|
|
130
169
|
|
|
131
170
|
def _append_drop_col_to_spark_schema(schema: "T.StructType", drop_col: str) -> "T.StructType":
|
|
171
|
+
"""Ensure the drop column exists in the Spark schema.
|
|
172
|
+
|
|
173
|
+
Args:
|
|
174
|
+
schema: Spark schema to augment.
|
|
175
|
+
drop_col: Column name to add if missing.
|
|
176
|
+
|
|
177
|
+
Returns:
|
|
178
|
+
Updated Spark schema.
|
|
179
|
+
"""
|
|
132
180
|
_require_pyspark("_append_drop_col_to_spark_schema")
|
|
133
181
|
if drop_col in schema.fieldNames():
|
|
134
182
|
return schema
|
|
@@ -169,6 +217,14 @@ def upsample(
|
|
|
169
217
|
spark_schema = arrow_field_to_spark_field(options.target_field)
|
|
170
218
|
|
|
171
219
|
def within_group(tb: pa.Table) -> pa.Table:
|
|
220
|
+
"""Apply upsample logic to a grouped Arrow table.
|
|
221
|
+
|
|
222
|
+
Args:
|
|
223
|
+
tb: Arrow table for a grouped partition.
|
|
224
|
+
|
|
225
|
+
Returns:
|
|
226
|
+
Arrow table with upsampled data.
|
|
227
|
+
"""
|
|
172
228
|
res = (
|
|
173
229
|
arrow_table_to_polars_dataframe(tb, options)
|
|
174
230
|
.sort(time_col_name)
|
|
@@ -277,6 +333,14 @@ def resample(
|
|
|
277
333
|
out_options = CastOptions.check_arg(out_arrow_field)
|
|
278
334
|
|
|
279
335
|
def within_group(tb: pa.Table) -> pa.Table:
|
|
336
|
+
"""Apply resample logic to a grouped Arrow table.
|
|
337
|
+
|
|
338
|
+
Args:
|
|
339
|
+
tb: Arrow table for a grouped partition.
|
|
340
|
+
|
|
341
|
+
Returns:
|
|
342
|
+
Arrow table with resampled data.
|
|
343
|
+
"""
|
|
280
344
|
from .polars_extensions import resample
|
|
281
345
|
|
|
282
346
|
pdf = arrow_table_to_polars_dataframe(tb, in_options)
|
|
@@ -329,6 +393,18 @@ def checkJoin(
|
|
|
329
393
|
*args,
|
|
330
394
|
**kwargs,
|
|
331
395
|
):
|
|
396
|
+
"""Join two DataFrames with schema-aware column casting.
|
|
397
|
+
|
|
398
|
+
Args:
|
|
399
|
+
df: Left Spark DataFrame.
|
|
400
|
+
other: Right Spark DataFrame.
|
|
401
|
+
on: Join keys or mapping.
|
|
402
|
+
*args: Positional args passed to join.
|
|
403
|
+
**kwargs: Keyword args passed to join.
|
|
404
|
+
|
|
405
|
+
Returns:
|
|
406
|
+
Joined Spark DataFrame.
|
|
407
|
+
"""
|
|
332
408
|
_require_pyspark("checkJoin")
|
|
333
409
|
|
|
334
410
|
other = convert(other, SparkDataFrame)
|
|
@@ -371,12 +447,32 @@ def checkMapInArrow(
|
|
|
371
447
|
*args,
|
|
372
448
|
**kwargs,
|
|
373
449
|
):
|
|
450
|
+
"""Wrap mapInArrow to enforce output schema conversion.
|
|
451
|
+
|
|
452
|
+
Args:
|
|
453
|
+
df: Spark DataFrame.
|
|
454
|
+
func: Generator function yielding RecordBatches.
|
|
455
|
+
schema: Output schema (Spark StructType or DDL string).
|
|
456
|
+
*args: Positional args passed to mapInArrow.
|
|
457
|
+
**kwargs: Keyword args passed to mapInArrow.
|
|
458
|
+
|
|
459
|
+
Returns:
|
|
460
|
+
Spark DataFrame with enforced schema.
|
|
461
|
+
"""
|
|
374
462
|
_require_pyspark("mapInArrow")
|
|
375
463
|
|
|
376
464
|
spark_schema = convert(schema, T.StructType)
|
|
377
465
|
arrow_schema = convert(schema, pa.Field)
|
|
378
466
|
|
|
379
467
|
def patched(batches: Iterable[pa.RecordBatch]):
|
|
468
|
+
"""Convert batches yielded by user function to the target schema.
|
|
469
|
+
|
|
470
|
+
Args:
|
|
471
|
+
batches: Input RecordBatch iterable.
|
|
472
|
+
|
|
473
|
+
Yields:
|
|
474
|
+
RecordBatch instances conforming to the output schema.
|
|
475
|
+
"""
|
|
380
476
|
for src in func(batches):
|
|
381
477
|
yield convert(src, pa.RecordBatch, arrow_schema)
|
|
382
478
|
|
|
@@ -395,6 +491,18 @@ def checkMapInPandas(
|
|
|
395
491
|
*args,
|
|
396
492
|
**kwargs,
|
|
397
493
|
):
|
|
494
|
+
"""Wrap mapInPandas to enforce output schema conversion.
|
|
495
|
+
|
|
496
|
+
Args:
|
|
497
|
+
df: Spark DataFrame.
|
|
498
|
+
func: Generator function yielding pandas DataFrames.
|
|
499
|
+
schema: Output schema (Spark StructType or DDL string).
|
|
500
|
+
*args: Positional args passed to mapInPandas.
|
|
501
|
+
**kwargs: Keyword args passed to mapInPandas.
|
|
502
|
+
|
|
503
|
+
Returns:
|
|
504
|
+
Spark DataFrame with enforced schema.
|
|
505
|
+
"""
|
|
398
506
|
_require_pyspark("mapInPandas")
|
|
399
507
|
|
|
400
508
|
import pandas as _pd # local import so we don't shadow the ..pandas module
|
|
@@ -403,6 +511,14 @@ def checkMapInPandas(
|
|
|
403
511
|
arrow_schema = convert(schema, pa.Field)
|
|
404
512
|
|
|
405
513
|
def patched(batches: Iterable[_pd.DataFrame]):
|
|
514
|
+
"""Convert pandas batches yielded by user function to the target schema.
|
|
515
|
+
|
|
516
|
+
Args:
|
|
517
|
+
batches: Input pandas DataFrame iterable.
|
|
518
|
+
|
|
519
|
+
Yields:
|
|
520
|
+
pandas DataFrames conforming to the output schema.
|
|
521
|
+
"""
|
|
406
522
|
for src in func(batches):
|
|
407
523
|
yield convert(src, _pd.DataFrame, arrow_schema)
|
|
408
524
|
|
yggdrasil/libs/pandaslib.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
"""Optional pandas dependency helpers."""
|
|
2
|
+
|
|
1
3
|
try:
|
|
2
4
|
import pandas # type: ignore
|
|
3
5
|
pandas = pandas
|
|
@@ -6,6 +8,11 @@ except ImportError:
|
|
|
6
8
|
|
|
7
9
|
|
|
8
10
|
def require_pandas():
|
|
11
|
+
"""Ensure pandas is available before using pandas helpers.
|
|
12
|
+
|
|
13
|
+
Returns:
|
|
14
|
+
None.
|
|
15
|
+
"""
|
|
9
16
|
if pandas is None:
|
|
10
17
|
raise ImportError(
|
|
11
18
|
"pandas is required to use this function. "
|
yggdrasil/libs/polarslib.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
"""Optional Polars dependency helpers."""
|
|
2
|
+
|
|
1
3
|
try:
|
|
2
4
|
import polars # type: ignore
|
|
3
5
|
|
|
@@ -13,6 +15,11 @@ __all__ = [
|
|
|
13
15
|
|
|
14
16
|
|
|
15
17
|
def require_polars():
|
|
18
|
+
"""Ensure polars is available before using polars helpers.
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
None.
|
|
22
|
+
"""
|
|
16
23
|
if polars is None:
|
|
17
24
|
raise ImportError(
|
|
18
25
|
"polars is required to use this function. "
|
yggdrasil/libs/sparklib.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
"""Optional Spark dependency helpers and Arrow/Spark type conversions."""
|
|
2
|
+
|
|
1
3
|
from typing import Any
|
|
2
4
|
|
|
3
5
|
import pyarrow as pa
|
|
@@ -51,18 +53,23 @@ except ImportError: # pragma: no cover - Spark not available
|
|
|
51
53
|
pyspark = None
|
|
52
54
|
|
|
53
55
|
class SparkSession:
|
|
56
|
+
"""Fallback SparkSession placeholder when pyspark is unavailable."""
|
|
54
57
|
|
|
55
58
|
@classmethod
|
|
56
59
|
def getActiveSession(cls):
|
|
60
|
+
"""Return None to indicate no active session is available."""
|
|
57
61
|
return None
|
|
58
62
|
|
|
59
63
|
class SparkDataFrame:
|
|
64
|
+
"""Fallback DataFrame placeholder when pyspark is unavailable."""
|
|
60
65
|
pass
|
|
61
66
|
|
|
62
67
|
class SparkColumn:
|
|
68
|
+
"""Fallback Column placeholder when pyspark is unavailable."""
|
|
63
69
|
pass
|
|
64
70
|
|
|
65
71
|
class SparkDataType:
|
|
72
|
+
"""Fallback DataType placeholder when pyspark is unavailable."""
|
|
66
73
|
pass
|
|
67
74
|
|
|
68
75
|
ARROW_TO_SPARK = {}
|
|
@@ -91,6 +98,12 @@ __all__ = [
|
|
|
91
98
|
def require_pyspark(active_session: bool = False):
|
|
92
99
|
"""
|
|
93
100
|
Optionally enforce that pyspark (and an active SparkSession) exists.
|
|
101
|
+
|
|
102
|
+
Args:
|
|
103
|
+
active_session: Require an active SparkSession if True.
|
|
104
|
+
|
|
105
|
+
Returns:
|
|
106
|
+
None.
|
|
94
107
|
"""
|
|
95
108
|
if pyspark is None:
|
|
96
109
|
raise ImportError(
|
|
@@ -116,6 +129,13 @@ def arrow_type_to_spark_type(
|
|
|
116
129
|
) -> "T.DataType":
|
|
117
130
|
"""
|
|
118
131
|
Convert a pyarrow.DataType to a pyspark.sql.types.DataType.
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
arrow_type: Arrow data type to convert.
|
|
135
|
+
cast_options: Optional casting options.
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
Spark SQL data type.
|
|
119
139
|
"""
|
|
120
140
|
require_pyspark()
|
|
121
141
|
|
|
@@ -191,6 +211,13 @@ def arrow_field_to_spark_field(
|
|
|
191
211
|
) -> "T.StructField":
|
|
192
212
|
"""
|
|
193
213
|
Convert a pyarrow.Field to a pyspark StructField.
|
|
214
|
+
|
|
215
|
+
Args:
|
|
216
|
+
field: Arrow field to convert.
|
|
217
|
+
cast_options: Optional casting options.
|
|
218
|
+
|
|
219
|
+
Returns:
|
|
220
|
+
Spark StructField representation.
|
|
194
221
|
"""
|
|
195
222
|
spark_type = arrow_type_to_spark_type(field.type, cast_options)
|
|
196
223
|
|
|
@@ -208,6 +235,13 @@ def spark_type_to_arrow_type(
|
|
|
208
235
|
) -> pa.DataType:
|
|
209
236
|
"""
|
|
210
237
|
Convert a pyspark.sql.types.DataType to a pyarrow.DataType.
|
|
238
|
+
|
|
239
|
+
Args:
|
|
240
|
+
spark_type: Spark SQL data type to convert.
|
|
241
|
+
cast_options: Optional casting options.
|
|
242
|
+
|
|
243
|
+
Returns:
|
|
244
|
+
Arrow data type.
|
|
211
245
|
"""
|
|
212
246
|
require_pyspark()
|
|
213
247
|
from pyspark.sql.types import (
|
|
@@ -287,6 +321,13 @@ def spark_field_to_arrow_field(
|
|
|
287
321
|
) -> pa.Field:
|
|
288
322
|
"""
|
|
289
323
|
Convert a pyspark StructField to a pyarrow.Field.
|
|
324
|
+
|
|
325
|
+
Args:
|
|
326
|
+
field: Spark StructField to convert.
|
|
327
|
+
cast_options: Optional casting options.
|
|
328
|
+
|
|
329
|
+
Returns:
|
|
330
|
+
Arrow field.
|
|
290
331
|
"""
|
|
291
332
|
arrow_type = spark_type_to_arrow_type(field.dataType, cast_options)
|
|
292
333
|
|
yggdrasil/pyutils/__init__.py
CHANGED
|
@@ -1,4 +1,8 @@
|
|
|
1
|
+
"""Python utility helpers for retries, parallelism, and environment management."""
|
|
2
|
+
|
|
1
3
|
from .retry import retry
|
|
2
4
|
from .parallel import parallelize
|
|
3
5
|
from .python_env import PythonEnv
|
|
4
6
|
from .callable_serde import CallableSerde
|
|
7
|
+
|
|
8
|
+
__all__ = ["retry", "parallelize", "PythonEnv", "CallableSerde"]
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
"""Callable serialization helpers for cross-process execution."""
|
|
2
|
+
|
|
1
3
|
from __future__ import annotations
|
|
2
4
|
|
|
3
5
|
import base64
|
|
@@ -26,6 +28,15 @@ _FLAG_COMPRESSED = 1
|
|
|
26
28
|
|
|
27
29
|
|
|
28
30
|
def _resolve_attr_chain(mod: Any, qualname: str) -> Any:
|
|
31
|
+
"""Resolve a dotted attribute path from a module.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
mod: Module to traverse.
|
|
35
|
+
qualname: Dotted qualified name.
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
Resolved attribute.
|
|
39
|
+
"""
|
|
29
40
|
obj = mod
|
|
30
41
|
for part in qualname.split("."):
|
|
31
42
|
obj = getattr(obj, part)
|
|
@@ -49,6 +60,14 @@ def _find_pkg_root_from_file(file_path: Path) -> Optional[Path]:
|
|
|
49
60
|
|
|
50
61
|
|
|
51
62
|
def _callable_file_line(fn: Callable[..., Any]) -> Tuple[Optional[str], Optional[int]]:
|
|
63
|
+
"""Return the source file path and line number for a callable.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
fn: Callable to inspect.
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
Tuple of (file path, line number).
|
|
70
|
+
"""
|
|
52
71
|
file = None
|
|
53
72
|
line = None
|
|
54
73
|
try:
|
|
@@ -85,6 +104,14 @@ def _referenced_global_names(fn: Callable[..., Any]) -> Set[str]:
|
|
|
85
104
|
|
|
86
105
|
|
|
87
106
|
def _is_importable_reference(fn: Callable[..., Any]) -> bool:
|
|
107
|
+
"""Return True when a callable can be imported by module and qualname.
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
fn: Callable to inspect.
|
|
111
|
+
|
|
112
|
+
Returns:
|
|
113
|
+
True if importable by module/qualname.
|
|
114
|
+
"""
|
|
88
115
|
mod_name = getattr(fn, "__module__", None)
|
|
89
116
|
qualname = getattr(fn, "__qualname__", None)
|
|
90
117
|
if not mod_name or not qualname:
|
|
@@ -245,6 +272,14 @@ class CallableSerde:
|
|
|
245
272
|
|
|
246
273
|
@classmethod
|
|
247
274
|
def from_callable(cls: type[T], x: Union[Callable[..., Any], T]) -> T:
|
|
275
|
+
"""Create a CallableSerde from a callable or existing instance.
|
|
276
|
+
|
|
277
|
+
Args:
|
|
278
|
+
x: Callable or CallableSerde instance.
|
|
279
|
+
|
|
280
|
+
Returns:
|
|
281
|
+
CallableSerde instance.
|
|
282
|
+
"""
|
|
248
283
|
if isinstance(x, cls):
|
|
249
284
|
return x
|
|
250
285
|
|
|
@@ -256,14 +291,29 @@ class CallableSerde:
|
|
|
256
291
|
|
|
257
292
|
@property
|
|
258
293
|
def module(self) -> Optional[str]:
|
|
294
|
+
"""Return the callable's module name if available.
|
|
295
|
+
|
|
296
|
+
Returns:
|
|
297
|
+
Module name or None.
|
|
298
|
+
"""
|
|
259
299
|
return self._module or (getattr(self.fn, "__module__", None) if self.fn else None)
|
|
260
300
|
|
|
261
301
|
@property
|
|
262
302
|
def qualname(self) -> Optional[str]:
|
|
303
|
+
"""Return the callable's qualified name if available.
|
|
304
|
+
|
|
305
|
+
Returns:
|
|
306
|
+
Qualified name or None.
|
|
307
|
+
"""
|
|
263
308
|
return self._qualname or (getattr(self.fn, "__qualname__", None) if self.fn else None)
|
|
264
309
|
|
|
265
310
|
@property
|
|
266
311
|
def file(self) -> Optional[str]:
|
|
312
|
+
"""Return the filesystem path of the callable's source file.
|
|
313
|
+
|
|
314
|
+
Returns:
|
|
315
|
+
File path or None.
|
|
316
|
+
"""
|
|
267
317
|
if not self.fn:
|
|
268
318
|
return None
|
|
269
319
|
f, _ = _callable_file_line(self.fn)
|
|
@@ -271,6 +321,11 @@ class CallableSerde:
|
|
|
271
321
|
|
|
272
322
|
@property
|
|
273
323
|
def line(self) -> Optional[int]:
|
|
324
|
+
"""Return the line number where the callable is defined.
|
|
325
|
+
|
|
326
|
+
Returns:
|
|
327
|
+
Line number or None.
|
|
328
|
+
"""
|
|
274
329
|
if not self.fn:
|
|
275
330
|
return None
|
|
276
331
|
_, ln = _callable_file_line(self.fn)
|
|
@@ -278,6 +333,11 @@ class CallableSerde:
|
|
|
278
333
|
|
|
279
334
|
@property
|
|
280
335
|
def pkg_root(self) -> Optional[str]:
|
|
336
|
+
"""Return the inferred package root for the callable, if known.
|
|
337
|
+
|
|
338
|
+
Returns:
|
|
339
|
+
Package root path or None.
|
|
340
|
+
"""
|
|
281
341
|
if self._pkg_root:
|
|
282
342
|
return self._pkg_root
|
|
283
343
|
if not self.file:
|
|
@@ -287,6 +347,11 @@ class CallableSerde:
|
|
|
287
347
|
|
|
288
348
|
@property
|
|
289
349
|
def relpath_from_pkg_root(self) -> Optional[str]:
|
|
350
|
+
"""Return the callable's path relative to the package root.
|
|
351
|
+
|
|
352
|
+
Returns:
|
|
353
|
+
Relative path or None.
|
|
354
|
+
"""
|
|
290
355
|
if not self.file or not self.pkg_root:
|
|
291
356
|
return None
|
|
292
357
|
try:
|
|
@@ -296,6 +361,11 @@ class CallableSerde:
|
|
|
296
361
|
|
|
297
362
|
@property
|
|
298
363
|
def importable(self) -> bool:
|
|
364
|
+
"""Return True when the callable can be imported by reference.
|
|
365
|
+
|
|
366
|
+
Returns:
|
|
367
|
+
True if importable by module/qualname.
|
|
368
|
+
"""
|
|
299
369
|
if self.fn is None:
|
|
300
370
|
return bool(self.module and self.qualname and "<locals>" not in (self.qualname or ""))
|
|
301
371
|
return _is_importable_reference(self.fn)
|
|
@@ -309,6 +379,16 @@ class CallableSerde:
|
|
|
309
379
|
dump_env: str = "none", # "none" | "globals" | "closure" | "both"
|
|
310
380
|
filter_used_globals: bool = True,
|
|
311
381
|
) -> Dict[str, Any]:
|
|
382
|
+
"""Serialize the callable into a dict for transport.
|
|
383
|
+
|
|
384
|
+
Args:
|
|
385
|
+
prefer: Preferred serialization kind.
|
|
386
|
+
dump_env: Environment payload selection.
|
|
387
|
+
filter_used_globals: Filter globals to referenced names.
|
|
388
|
+
|
|
389
|
+
Returns:
|
|
390
|
+
Serialized payload dict.
|
|
391
|
+
"""
|
|
312
392
|
kind = prefer
|
|
313
393
|
if kind == "import" and not self.importable:
|
|
314
394
|
kind = "dill"
|
|
@@ -352,6 +432,15 @@ class CallableSerde:
|
|
|
352
432
|
|
|
353
433
|
@classmethod
|
|
354
434
|
def load(cls: type[T], d: Dict[str, Any], *, add_pkg_root_to_syspath: bool = True) -> T:
|
|
435
|
+
"""Construct a CallableSerde from a serialized dict payload.
|
|
436
|
+
|
|
437
|
+
Args:
|
|
438
|
+
d: Serialized payload dict.
|
|
439
|
+
add_pkg_root_to_syspath: Add package root to sys.path if True.
|
|
440
|
+
|
|
441
|
+
Returns:
|
|
442
|
+
CallableSerde instance.
|
|
443
|
+
"""
|
|
355
444
|
obj = cls(
|
|
356
445
|
fn=None,
|
|
357
446
|
_kind=d.get("kind", "auto"),
|
|
@@ -369,6 +458,14 @@ class CallableSerde:
|
|
|
369
458
|
return obj # type: ignore[return-value]
|
|
370
459
|
|
|
371
460
|
def materialize(self, *, add_pkg_root_to_syspath: bool = True) -> Callable[..., Any]:
|
|
461
|
+
"""Resolve and return the underlying callable.
|
|
462
|
+
|
|
463
|
+
Args:
|
|
464
|
+
add_pkg_root_to_syspath: Add package root to sys.path if True.
|
|
465
|
+
|
|
466
|
+
Returns:
|
|
467
|
+
Resolved callable.
|
|
468
|
+
"""
|
|
372
469
|
if self.fn is not None:
|
|
373
470
|
return self.fn
|
|
374
471
|
|
|
@@ -402,6 +499,15 @@ class CallableSerde:
|
|
|
402
499
|
raise ValueError(f"Unknown kind: {kind}")
|
|
403
500
|
|
|
404
501
|
def __call__(self, *args: Any, **kwargs: Any) -> Any:
|
|
502
|
+
"""Invoke the materialized callable with the provided arguments.
|
|
503
|
+
|
|
504
|
+
Args:
|
|
505
|
+
*args: Positional args for the callable.
|
|
506
|
+
**kwargs: Keyword args for the callable.
|
|
507
|
+
|
|
508
|
+
Returns:
|
|
509
|
+
Callable return value.
|
|
510
|
+
"""
|
|
405
511
|
fn = self.materialize()
|
|
406
512
|
return fn(*args, **kwargs)
|
|
407
513
|
|
yggdrasil/pyutils/exceptions.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
"""Utilities for parsing and re-raising exceptions from traceback strings."""
|
|
2
|
+
|
|
1
3
|
import builtins
|
|
2
4
|
import dataclasses as dc
|
|
3
5
|
import re
|
|
@@ -26,6 +28,7 @@ _BARE_EXC_RE = re.compile(r"(?m)^\s*([A-Za-z_]\w*(?:Error|Exception|Warning|Inte
|
|
|
26
28
|
|
|
27
29
|
@dc.dataclass(frozen=True)
|
|
28
30
|
class ParsedException:
|
|
31
|
+
"""Structured representation of a parsed exception type and message."""
|
|
29
32
|
exc_type: Type[BaseException]
|
|
30
33
|
message: str
|
|
31
34
|
raw_type_name: str
|
|
@@ -34,10 +37,23 @@ class ParsedException:
|
|
|
34
37
|
class RemoteTraceback(Exception):
|
|
35
38
|
"""Holds a traceback *string* and prints it as the chained cause."""
|
|
36
39
|
def __init__(self, traceback_text: str):
|
|
40
|
+
"""Store the traceback text for later display.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
traceback_text: Traceback string to store.
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
None.
|
|
47
|
+
"""
|
|
37
48
|
super().__init__("Remote traceback (text)")
|
|
38
49
|
self.traceback_text = traceback_text
|
|
39
50
|
|
|
40
51
|
def __str__(self) -> str:
|
|
52
|
+
"""Render the exception with its stored traceback text.
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
Rendered exception string with traceback text.
|
|
56
|
+
"""
|
|
41
57
|
return f"{self.args[0]}\n\n{self.traceback_text}"
|
|
42
58
|
|
|
43
59
|
|
yggdrasil/pyutils/modules.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
"""Module dependency and pip index inspection utilities."""
|
|
2
|
+
|
|
1
3
|
# modules.py
|
|
2
4
|
from __future__ import annotations
|
|
3
5
|
|
|
@@ -43,6 +45,14 @@ MODULE_PROJECT_NAMES_ALIASES = {
|
|
|
43
45
|
|
|
44
46
|
|
|
45
47
|
def module_name_to_project_name(module_name: str) -> str:
|
|
48
|
+
"""Map module import names to PyPI project names when they differ.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
module_name: Importable module name.
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
PyPI project name.
|
|
55
|
+
"""
|
|
46
56
|
return MODULE_PROJECT_NAMES_ALIASES.get(module_name, module_name)
|
|
47
57
|
|
|
48
58
|
|
|
@@ -104,6 +114,7 @@ _REQ_NAME_RE = re.compile(r"^\s*([A-Za-z0-9][A-Za-z0-9._-]*)")
|
|
|
104
114
|
|
|
105
115
|
@dc.dataclass(frozen=True)
|
|
106
116
|
class DependencyMetadata:
|
|
117
|
+
"""Metadata describing an installed or missing dependency."""
|
|
107
118
|
project: str
|
|
108
119
|
requirement: str
|
|
109
120
|
installed: bool
|
|
@@ -136,6 +147,14 @@ def _req_project_name(req_line: str) -> Optional[str]:
|
|
|
136
147
|
|
|
137
148
|
|
|
138
149
|
def _distribution_for_module(mod: Union[str, ModuleType]):
|
|
150
|
+
"""Resolve the importlib.metadata distribution that provides a module.
|
|
151
|
+
|
|
152
|
+
Args:
|
|
153
|
+
mod: Module name or module object.
|
|
154
|
+
|
|
155
|
+
Returns:
|
|
156
|
+
importlib.metadata.Distribution instance.
|
|
157
|
+
"""
|
|
139
158
|
if ilm is None:
|
|
140
159
|
raise RuntimeError("importlib.metadata is not available")
|
|
141
160
|
|
|
@@ -213,6 +232,14 @@ def module_dependencies(lib: Union[str, ModuleType]) -> List[DependencyMetadata]
|
|
|
213
232
|
|
|
214
233
|
|
|
215
234
|
def _run_pip(*args: str) -> Tuple[int, str, str]:
|
|
235
|
+
"""Run pip with arguments and return (returncode, stdout, stderr).
|
|
236
|
+
|
|
237
|
+
Args:
|
|
238
|
+
*args: Pip arguments.
|
|
239
|
+
|
|
240
|
+
Returns:
|
|
241
|
+
Tuple of (returncode, stdout, stderr).
|
|
242
|
+
"""
|
|
216
243
|
p = subprocess.run(
|
|
217
244
|
[sys.executable, "-m", "pip", *args],
|
|
218
245
|
text=True,
|
|
@@ -225,21 +252,37 @@ def _run_pip(*args: str) -> Tuple[int, str, str]:
|
|
|
225
252
|
|
|
226
253
|
@dc.dataclass(frozen=True)
|
|
227
254
|
class PipIndexSettings:
|
|
255
|
+
"""Resolved pip index configuration from env and config sources."""
|
|
228
256
|
index_url: Optional[str] = None
|
|
229
257
|
extra_index_urls: List[str] = dc.field(default_factory=list)
|
|
230
258
|
sources: Dict[str, Dict[str, Any]] = dc.field(default_factory=dict) # {"env": {...}, "config": {...}}
|
|
231
259
|
|
|
232
260
|
@classmethod
|
|
233
261
|
def default_settings(cls):
|
|
262
|
+
"""Return the cached default pip index settings.
|
|
263
|
+
|
|
264
|
+
Returns:
|
|
265
|
+
Default PipIndexSettings instance.
|
|
266
|
+
"""
|
|
234
267
|
return DEFAULT_PIP_INDEX_SETTINGS
|
|
235
268
|
|
|
236
269
|
@property
|
|
237
270
|
def extra_index_url(self):
|
|
271
|
+
"""Return extra index URLs as a space-separated string.
|
|
272
|
+
|
|
273
|
+
Returns:
|
|
274
|
+
Space-separated extra index URLs or None.
|
|
275
|
+
"""
|
|
238
276
|
if self.extra_index_urls:
|
|
239
277
|
return " ".join(self.extra_index_urls)
|
|
240
278
|
return None
|
|
241
279
|
|
|
242
280
|
def as_dict(self) -> dict:
|
|
281
|
+
"""Return a dict representation of the settings.
|
|
282
|
+
|
|
283
|
+
Returns:
|
|
284
|
+
Dict representation of settings.
|
|
285
|
+
"""
|
|
243
286
|
return dc.asdict(self)
|
|
244
287
|
|
|
245
288
|
|
|
@@ -325,4 +368,4 @@ def get_pip_index_settings() -> PipIndexSettings:
|
|
|
325
368
|
try:
|
|
326
369
|
DEFAULT_PIP_INDEX_SETTINGS = get_pip_index_settings()
|
|
327
370
|
except:
|
|
328
|
-
DEFAULT_PIP_INDEX_SETTINGS = PipIndexSettings()
|
|
371
|
+
DEFAULT_PIP_INDEX_SETTINGS = PipIndexSettings()
|