tangle-cli 0.0.1a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tangle_cli/__init__.py +19 -0
- tangle_cli/api_cli.py +787 -0
- tangle_cli/api_schema.py +633 -0
- tangle_cli/api_transport.py +461 -0
- tangle_cli/args_container.py +244 -0
- tangle_cli/artifacts.py +293 -0
- tangle_cli/artifacts_cli.py +108 -0
- tangle_cli/cli.py +57 -0
- tangle_cli/cli_helpers.py +116 -0
- tangle_cli/cli_options.py +52 -0
- tangle_cli/client.py +677 -0
- tangle_cli/component_from_func.py +1856 -0
- tangle_cli/component_generator.py +298 -0
- tangle_cli/component_inspector.py +494 -0
- tangle_cli/component_publisher.py +921 -0
- tangle_cli/components_cli.py +269 -0
- tangle_cli/dynamic_discovery_client.py +296 -0
- tangle_cli/generated_model_extensions.py +405 -0
- tangle_cli/generated_runtime.py +43 -0
- tangle_cli/handler.py +96 -0
- tangle_cli/hydration_trust.py +222 -0
- tangle_cli/logger.py +166 -0
- tangle_cli/models.py +407 -0
- tangle_cli/module_bundler.py +662 -0
- tangle_cli/openapi/__init__.py +0 -0
- tangle_cli/openapi/codegen.py +1090 -0
- tangle_cli/openapi/parser.py +77 -0
- tangle_cli/pipeline_dehydrator.py +720 -0
- tangle_cli/pipeline_hydrator.py +1785 -0
- tangle_cli/pipeline_run_annotations.py +41 -0
- tangle_cli/pipeline_run_details.py +203 -0
- tangle_cli/pipeline_run_manager.py +1994 -0
- tangle_cli/pipeline_run_search.py +712 -0
- tangle_cli/pipeline_runner.py +620 -0
- tangle_cli/pipeline_runs_cli.py +584 -0
- tangle_cli/pipelines.py +581 -0
- tangle_cli/pipelines_cli.py +271 -0
- tangle_cli/published_components_cli.py +373 -0
- tangle_cli/py.typed +0 -0
- tangle_cli/quickstart.py +110 -0
- tangle_cli/secrets.py +156 -0
- tangle_cli/secrets_cli.py +269 -0
- tangle_cli/utils.py +942 -0
- tangle_cli/version_manager.py +470 -0
- tangle_cli-0.0.1a1.dist-info/METADATA +561 -0
- tangle_cli-0.0.1a1.dist-info/RECORD +48 -0
- tangle_cli-0.0.1a1.dist-info/WHEEL +4 -0
- tangle_cli-0.0.1a1.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,662 @@
|
|
|
1
|
+
"""Discovers local Python modules, bundles their source, and generates injection code.
|
|
2
|
+
|
|
3
|
+
Provides ``ModuleBundler`` for embedding local dependency modules into generated
|
|
4
|
+
components so they are available at runtime without requiring the original
|
|
5
|
+
package to be installed in the container.
|
|
6
|
+
|
|
7
|
+
Also contains ``classify_imports`` — the import classification utility used by
|
|
8
|
+
both the component generator and the airflow converter.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import ast
|
|
12
|
+
import base64
|
|
13
|
+
import importlib.util
|
|
14
|
+
import json
|
|
15
|
+
import os
|
|
16
|
+
import re
|
|
17
|
+
import sys
|
|
18
|
+
import textwrap
|
|
19
|
+
from collections.abc import Iterator
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
from typing import Literal
|
|
22
|
+
|
|
23
|
+
# Paths that indicate a module is installed (not local project code).
|
|
24
|
+
_INSTALLED_PACKAGE_MARKERS = ("site-packages", "dist-packages")
|
|
25
|
+
|
|
26
|
+
# =============================================================================
|
|
27
|
+
# Import classification
|
|
28
|
+
# =============================================================================
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def classify_imports(
|
|
32
|
+
file_path: Path,
|
|
33
|
+
pip_deps: list[str] | None = None,
|
|
34
|
+
resolve_root: Path | None = None,
|
|
35
|
+
source: str | None = None,
|
|
36
|
+
) -> dict[str, Literal["stdlib", "third_party", "local"]]:
|
|
37
|
+
"""Classify imports in a Python file as stdlib, third-party, or local.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
file_path: Path to the Python source file
|
|
41
|
+
pip_deps: List of pip dependency strings (e.g., ["pandas==2.0", "requests>=2.28"])
|
|
42
|
+
resolve_root: Directory to check for local modules. Defaults to file_path.parent.
|
|
43
|
+
Use this when imports resolve relative to a different root (e.g., dags_root
|
|
44
|
+
for Airflow DAG files).
|
|
45
|
+
source: Pre-read source text. If provided, the file is not read again.
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
Dict mapping module names to their classification.
|
|
49
|
+
"""
|
|
50
|
+
if source is None:
|
|
51
|
+
source = file_path.read_text()
|
|
52
|
+
tree = ast.parse(source)
|
|
53
|
+
|
|
54
|
+
# Extract top-level module names from pip deps
|
|
55
|
+
third_party_names: set[str] = set()
|
|
56
|
+
if pip_deps:
|
|
57
|
+
for dep in pip_deps:
|
|
58
|
+
# Extract package name from dependency spec like "pandas==2.0.0" or "requests>=2.28"
|
|
59
|
+
name = re.split(r'[><=!~\[]', dep)[0].strip().lower()
|
|
60
|
+
# Normalize: pip package names use hyphens, import names use underscores
|
|
61
|
+
third_party_names.add(name.replace("-", "_"))
|
|
62
|
+
|
|
63
|
+
# Get stdlib module names
|
|
64
|
+
if hasattr(sys, "stdlib_module_names"):
|
|
65
|
+
stdlib_names: frozenset[str] | set[str] = sys.stdlib_module_names
|
|
66
|
+
else:
|
|
67
|
+
stdlib_names = set(sys.builtin_module_names)
|
|
68
|
+
|
|
69
|
+
result: dict[str, Literal["stdlib", "third_party", "local"]] = {}
|
|
70
|
+
file_dir = resolve_root or file_path.parent
|
|
71
|
+
|
|
72
|
+
for node in ast.walk(tree):
|
|
73
|
+
if isinstance(node, ast.Import):
|
|
74
|
+
for alias in node.names:
|
|
75
|
+
mod_name = alias.name.split(".")[0]
|
|
76
|
+
result[mod_name] = _classify_module(mod_name, stdlib_names, third_party_names, file_dir)
|
|
77
|
+
elif isinstance(node, ast.ImportFrom):
|
|
78
|
+
if node.module and node.level == 0:
|
|
79
|
+
mod_name = node.module.split(".")[0]
|
|
80
|
+
result[mod_name] = _classify_module(mod_name, stdlib_names, third_party_names, file_dir)
|
|
81
|
+
elif node.level > 0:
|
|
82
|
+
# Relative imports are always local
|
|
83
|
+
if node.module:
|
|
84
|
+
result[node.module.split(".")[0]] = "local"
|
|
85
|
+
elif node.names:
|
|
86
|
+
# `from . import helpers` — module is None, names has the imports
|
|
87
|
+
for alias in node.names:
|
|
88
|
+
result[alias.name.split(".")[0]] = "local"
|
|
89
|
+
|
|
90
|
+
return result
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _classify_module(
|
|
94
|
+
mod_name: str,
|
|
95
|
+
stdlib_names: frozenset[str] | set[str],
|
|
96
|
+
third_party_names: set[str],
|
|
97
|
+
file_dir: Path,
|
|
98
|
+
) -> Literal["stdlib", "third_party", "local"]:
|
|
99
|
+
"""Classify a single module name.
|
|
100
|
+
|
|
101
|
+
Uses a two-pass approach:
|
|
102
|
+
|
|
103
|
+
1. **Filesystem check** — looks for ``<mod_name>.py`` or
|
|
104
|
+
``<mod_name>/__init__.py`` directly under *file_dir*.
|
|
105
|
+
2. **importlib fallback** — uses ``importlib.util.find_spec`` to locate the
|
|
106
|
+
module on ``sys.path``. If the resolved origin is *not* inside
|
|
107
|
+
``site-packages`` or ``dist-packages`` it is treated as a local module.
|
|
108
|
+
This handles project layouts where local modules live in sibling
|
|
109
|
+
directories (e.g. ``src/utils`` next to ``src/components``).
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
mod_name: Top-level module name (e.g., "local_modules")
|
|
113
|
+
stdlib_names: Set of standard library module names
|
|
114
|
+
third_party_names: Set of third-party package names
|
|
115
|
+
file_dir: Directory to check for local files/packages
|
|
116
|
+
"""
|
|
117
|
+
if mod_name in stdlib_names:
|
|
118
|
+
return "stdlib"
|
|
119
|
+
if mod_name.lower() in third_party_names:
|
|
120
|
+
return "third_party"
|
|
121
|
+
# Check if a local .py file or package exists under file_dir
|
|
122
|
+
if (file_dir / f"{mod_name}.py").exists():
|
|
123
|
+
return "local"
|
|
124
|
+
if (file_dir / mod_name / "__init__.py").exists():
|
|
125
|
+
return "local"
|
|
126
|
+
# Fallback: use importlib to search sys.path for modules in sibling directories
|
|
127
|
+
if _is_local_via_importlib(mod_name):
|
|
128
|
+
return "local"
|
|
129
|
+
# Assume third-party if we can't determine
|
|
130
|
+
return "third_party"
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def _is_local_via_importlib(mod_name: str) -> bool:
|
|
134
|
+
"""Check whether *mod_name* resolves to a local (non-installed) module.
|
|
135
|
+
|
|
136
|
+
Returns ``True`` when ``importlib.util.find_spec`` finds the module and its
|
|
137
|
+
origin path does **not** contain ``site-packages`` or ``dist-packages``.
|
|
138
|
+
|
|
139
|
+
Note: ``find_spec`` may execute parent package ``__init__.py`` files as a
|
|
140
|
+
side effect when resolving dotted names. We catch all exceptions broadly
|
|
141
|
+
so that package-init failures (``RuntimeError``, ``KeyError``, etc.) do not
|
|
142
|
+
break the static generation step.
|
|
143
|
+
"""
|
|
144
|
+
try:
|
|
145
|
+
spec = importlib.util.find_spec(mod_name)
|
|
146
|
+
if spec is None:
|
|
147
|
+
return False
|
|
148
|
+
# Namespace packages have no origin — check submodule_search_locations
|
|
149
|
+
origin = spec.origin
|
|
150
|
+
search_locations = spec.submodule_search_locations
|
|
151
|
+
path_to_check = origin or (str(search_locations[0]) if search_locations else None)
|
|
152
|
+
if not path_to_check:
|
|
153
|
+
return False
|
|
154
|
+
return not any(marker in path_to_check for marker in _INSTALLED_PACKAGE_MARKERS)
|
|
155
|
+
except Exception:
|
|
156
|
+
return False
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
# =============================================================================
|
|
160
|
+
# ModuleBundler
|
|
161
|
+
# =============================================================================
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
class ModuleBundler:
|
|
165
|
+
"""Discovers local Python modules, bundles their source, and generates injection code.
|
|
166
|
+
|
|
167
|
+
Usage::
|
|
168
|
+
|
|
169
|
+
module_sources = ModuleBundler.collect_sources(dag_file, resolve_root=dags_root)
|
|
170
|
+
b64 = ModuleBundler.encode(module_sources)
|
|
171
|
+
snippet = ModuleBundler.build_injection(b64)
|
|
172
|
+
"""
|
|
173
|
+
|
|
174
|
+
@staticmethod
|
|
175
|
+
def classify_imports(
|
|
176
|
+
file_path: Path,
|
|
177
|
+
pip_deps: list[str] | None = None,
|
|
178
|
+
resolve_root: Path | None = None,
|
|
179
|
+
) -> dict[str, Literal["stdlib", "third_party", "local"]]:
|
|
180
|
+
"""Classify imports in a Python file as stdlib, third-party, or local.
|
|
181
|
+
|
|
182
|
+
Args:
|
|
183
|
+
file_path: Path to the Python source file
|
|
184
|
+
pip_deps: List of pip dependency strings (e.g., ["pandas==2.0", "requests>=2.28"])
|
|
185
|
+
resolve_root: Directory to check for local modules. Defaults to file_path.parent.
|
|
186
|
+
|
|
187
|
+
Returns:
|
|
188
|
+
Dict mapping module names to their classification.
|
|
189
|
+
"""
|
|
190
|
+
return classify_imports(file_path, pip_deps, resolve_root)
|
|
191
|
+
|
|
192
|
+
@staticmethod
|
|
193
|
+
def collect_sources(
|
|
194
|
+
file_path: Path,
|
|
195
|
+
resolve_root: Path | None = None,
|
|
196
|
+
pip_deps: list[str] | None = None,
|
|
197
|
+
source: str | None = None,
|
|
198
|
+
) -> dict[str, str]:
|
|
199
|
+
"""Collect source text of local dependency modules from disk.
|
|
200
|
+
|
|
201
|
+
Resolves local imports via AST analysis and filesystem lookup, without
|
|
202
|
+
requiring modules to be loaded in ``sys.modules``.
|
|
203
|
+
|
|
204
|
+
For each local import found by ``classify_imports``, the function resolves the
|
|
205
|
+
full dotted module path to a ``.py`` file (or ``__init__.py`` package) under
|
|
206
|
+
*resolve_root* and reads its source text. Transitive local imports within
|
|
207
|
+
each discovered module are also collected recursively.
|
|
208
|
+
|
|
209
|
+
Args:
|
|
210
|
+
file_path: Python source file whose imports to analyse.
|
|
211
|
+
resolve_root: Root directory for local module resolution. Defaults to
|
|
212
|
+
``file_path.parent``.
|
|
213
|
+
pip_deps: Pip dependency strings passed through to ``classify_imports``.
|
|
214
|
+
source: Source text to analyse instead of reading *file_path*. When
|
|
215
|
+
provided, only imports present in this text are considered. This
|
|
216
|
+
is useful for scoping the bundle to a specific callable rather
|
|
217
|
+
than the entire file.
|
|
218
|
+
|
|
219
|
+
Returns:
|
|
220
|
+
``{dotted_module_name: source_text}`` for every discovered local module.
|
|
221
|
+
"""
|
|
222
|
+
root = resolve_root or file_path.parent
|
|
223
|
+
if source is None:
|
|
224
|
+
source = file_path.read_text()
|
|
225
|
+
classifications = classify_imports(file_path, pip_deps, resolve_root=root, source=source)
|
|
226
|
+
|
|
227
|
+
local_top_names = {name for name, cls in classifications.items() if cls == "local"}
|
|
228
|
+
if not local_top_names:
|
|
229
|
+
return {}
|
|
230
|
+
|
|
231
|
+
# Walk the AST to collect full dotted module paths (classify_imports only
|
|
232
|
+
# records top-level names, e.g. "local_modules" from "from local_modules.dw import X").
|
|
233
|
+
full_module_paths = _collect_full_module_paths(source, local_top_names)
|
|
234
|
+
|
|
235
|
+
# Resolve each module path to a source file and read it
|
|
236
|
+
result: dict[str, str] = {}
|
|
237
|
+
visited: set[str] = set()
|
|
238
|
+
_resolve_modules_recursive(full_module_paths, root, result, visited, pip_deps)
|
|
239
|
+
return result
|
|
240
|
+
|
|
241
|
+
@staticmethod
|
|
242
|
+
def encode(module_sources: dict[str, str]) -> str | None:
|
|
243
|
+
"""Compress and base64-encode a dict of module sources for embedding.
|
|
244
|
+
|
|
245
|
+
Modules are sorted so that dependencies execute before dependents.
|
|
246
|
+
We perform a topological sort over the module-level import graph
|
|
247
|
+
between bundled modules, with parent packages preceding their
|
|
248
|
+
submodules. This ensures references made *at module load time*
|
|
249
|
+
(e.g. ``FOO = bbb.bar()`` at the top of ``aaa.py``) find their
|
|
250
|
+
target already executed — sorting purely by depth + name fails
|
|
251
|
+
whenever a dependent sorts before its dependency (issue #30197).
|
|
252
|
+
|
|
253
|
+
If the dependency graph contains a cycle (which would also fail
|
|
254
|
+
under a normal Python import for any module-level reference), we
|
|
255
|
+
fall back to ``(depth, alphabetical)`` order so output stays
|
|
256
|
+
deterministic.
|
|
257
|
+
|
|
258
|
+
Args:
|
|
259
|
+
module_sources: ``{module_name: source_text}`` dict.
|
|
260
|
+
|
|
261
|
+
Returns:
|
|
262
|
+
Base64-encoded string, or ``None`` if *module_sources* is empty.
|
|
263
|
+
"""
|
|
264
|
+
if not module_sources:
|
|
265
|
+
return None
|
|
266
|
+
import zlib
|
|
267
|
+
ordered_names = _topological_order(module_sources)
|
|
268
|
+
ordered = {name: module_sources[name] for name in ordered_names}
|
|
269
|
+
sources_json = json.dumps(ordered)
|
|
270
|
+
compressed = zlib.compress(sources_json.encode(), level=9)
|
|
271
|
+
return base64.b64encode(compressed).decode("ascii")
|
|
272
|
+
|
|
273
|
+
@staticmethod
|
|
274
|
+
def build_injection(bundled_modules_b64: str) -> str:
|
|
275
|
+
"""Return a Python snippet that decodes and injects bundled modules into ``sys.modules``.
|
|
276
|
+
|
|
277
|
+
The snippet is self-contained: it imports ``sys``, ``types``, ``base64``,
|
|
278
|
+
``json``, and ``zlib``, then decompresses the embedded blob and registers
|
|
279
|
+
each module via ``types.ModuleType`` + ``exec``.
|
|
280
|
+
|
|
281
|
+
Args:
|
|
282
|
+
bundled_modules_b64: Base64 string produced by ``encode``.
|
|
283
|
+
"""
|
|
284
|
+
return textwrap.dedent(f"""\
|
|
285
|
+
# --- Inject local dependency modules from embedded source ---
|
|
286
|
+
import sys
|
|
287
|
+
import types
|
|
288
|
+
import base64
|
|
289
|
+
import json
|
|
290
|
+
import zlib
|
|
291
|
+
|
|
292
|
+
_EMBEDDED_MODULES = json.loads(zlib.decompress(base64.b64decode({repr(bundled_modules_b64)})))
|
|
293
|
+
# Pass 1: register all modules in sys.modules (without executing source)
|
|
294
|
+
# so transitive imports between bundled modules can resolve in any order.
|
|
295
|
+
_module_objs = {{}}
|
|
296
|
+
_package_names = set()
|
|
297
|
+
for _mod_name in _EMBEDDED_MODULES:
|
|
298
|
+
_parts = _mod_name.split('.')
|
|
299
|
+
for _i in range(1, len(_parts)):
|
|
300
|
+
_package_names.add('.'.join(_parts[:_i]))
|
|
301
|
+
for _mod_name in _EMBEDDED_MODULES:
|
|
302
|
+
_parts = _mod_name.split('.')
|
|
303
|
+
for _i in range(1, len(_parts)):
|
|
304
|
+
_parent = '.'.join(_parts[:_i])
|
|
305
|
+
if _parent not in sys.modules:
|
|
306
|
+
_pkg = types.ModuleType(_parent)
|
|
307
|
+
_pkg.__path__ = []
|
|
308
|
+
_pkg.__package__ = _parent
|
|
309
|
+
sys.modules[_parent] = _pkg
|
|
310
|
+
_mod = sys.modules.get(_mod_name)
|
|
311
|
+
if _mod is None or _mod_name not in _package_names:
|
|
312
|
+
_mod = types.ModuleType(_mod_name)
|
|
313
|
+
sys.modules[_mod_name] = _mod
|
|
314
|
+
_is_package = _mod_name in _package_names
|
|
315
|
+
_mod.__package__ = _mod_name if _is_package else ('.'.join(_parts[:-1]) if len(_parts) > 1 else '')
|
|
316
|
+
if _is_package:
|
|
317
|
+
_mod.__path__ = []
|
|
318
|
+
if len(_parts) > 1:
|
|
319
|
+
setattr(sys.modules['.'.join(_parts[:-1])], _parts[-1], _mod)
|
|
320
|
+
_module_objs[_mod_name] = _mod
|
|
321
|
+
# Pass 2: execute source in all registered modules
|
|
322
|
+
for _mod_name, _mod_source in _EMBEDDED_MODULES.items():
|
|
323
|
+
_code = compile(_mod_source, _mod_name.replace('.', '/') + '.py', 'exec')
|
|
324
|
+
exec(_code, _module_objs[_mod_name].__dict__)""")
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
# =============================================================================
|
|
328
|
+
# Private helpers
|
|
329
|
+
# =============================================================================
|
|
330
|
+
|
|
331
|
+
|
|
332
|
+
def _topological_order(module_sources: dict[str, str]) -> list[str]:
|
|
333
|
+
"""Return bundled module names sorted so dependencies precede dependents.
|
|
334
|
+
|
|
335
|
+
Builds a graph of module-level imports between bundled modules and
|
|
336
|
+
runs ``graphlib.TopologicalSorter``. Falls back to ``(depth,
|
|
337
|
+
alphabetical)`` ordering when the graph contains a cycle so output
|
|
338
|
+
remains deterministic.
|
|
339
|
+
"""
|
|
340
|
+
from graphlib import CycleError, TopologicalSorter
|
|
341
|
+
|
|
342
|
+
bundled = set(module_sources)
|
|
343
|
+
# Insert nodes in a deterministic order so the topological sort's
|
|
344
|
+
# tie-breaking (insertion order, when multiple nodes are ready) is
|
|
345
|
+
# stable across runs.
|
|
346
|
+
fallback_order = sorted(bundled, key=lambda n: (n.count("."), n))
|
|
347
|
+
graph: dict[str, set[str]] = {name: set() for name in fallback_order}
|
|
348
|
+
for name in fallback_order:
|
|
349
|
+
graph[name] = _module_level_dependencies(name, module_sources[name], bundled)
|
|
350
|
+
|
|
351
|
+
try:
|
|
352
|
+
return list(TopologicalSorter(graph).static_order())
|
|
353
|
+
except CycleError:
|
|
354
|
+
return fallback_order
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
def _module_level_dependencies(name: str, source: str, bundled: set[str]) -> set[str]:
|
|
358
|
+
"""Return bundled modules that *name* depends on at module load time.
|
|
359
|
+
|
|
360
|
+
Considers only imports that execute when the module is first run
|
|
361
|
+
(i.e. excludes imports nested inside function or lambda bodies).
|
|
362
|
+
|
|
363
|
+
Note we deliberately do *not* add a blanket "parent package before
|
|
364
|
+
child module" edge. Pass 1 of the runtime injection registers every
|
|
365
|
+
bundled module in ``sys.modules`` up front, so a child can resolve
|
|
366
|
+
``import <parent>`` regardless of execution order. A child only
|
|
367
|
+
needs its parent exec'd first if it references the parent's
|
|
368
|
+
attributes at module load — and that case shows up as an explicit
|
|
369
|
+
``from <parent> import ...`` / ``import <parent>`` in the child's
|
|
370
|
+
source, which is captured below. Adding a blanket parent-before-
|
|
371
|
+
child edge would also create a spurious cycle whenever the parent's
|
|
372
|
+
``__init__.py`` does ``from . import sibling`` (a common pattern),
|
|
373
|
+
forcing the topological sort to fall back to the legacy alphabetical
|
|
374
|
+
order — the very behavior this function exists to replace.
|
|
375
|
+
"""
|
|
376
|
+
deps: set[str] = set()
|
|
377
|
+
|
|
378
|
+
try:
|
|
379
|
+
tree = ast.parse(source)
|
|
380
|
+
except SyntaxError:
|
|
381
|
+
return deps
|
|
382
|
+
|
|
383
|
+
# Mirrors the package-context convention used elsewhere in the
|
|
384
|
+
# bundler (see ``_resolve_modules_recursive``): top-level modules
|
|
385
|
+
# use themselves as the package context, submodules use their
|
|
386
|
+
# immediate parent.
|
|
387
|
+
parts = name.split(".")
|
|
388
|
+
pkg_context = ".".join(parts[:-1]) if len(parts) > 1 else name
|
|
389
|
+
|
|
390
|
+
for node in _iter_module_level_nodes(tree):
|
|
391
|
+
if not isinstance(node, (ast.Import, ast.ImportFrom)):
|
|
392
|
+
continue
|
|
393
|
+
for target in _import_node_targets(node, pkg_context):
|
|
394
|
+
# Match the longest dotted prefix that is bundled — handles
|
|
395
|
+
# ``from pkg.sub import mod`` where ``pkg.sub.mod`` is the
|
|
396
|
+
# bundled submodule.
|
|
397
|
+
tparts = target.split(".")
|
|
398
|
+
for j in range(len(tparts), 0, -1):
|
|
399
|
+
candidate = ".".join(tparts[:j])
|
|
400
|
+
if candidate in bundled and candidate != name:
|
|
401
|
+
deps.add(candidate)
|
|
402
|
+
break
|
|
403
|
+
return deps
|
|
404
|
+
|
|
405
|
+
|
|
406
|
+
def _iter_module_level_nodes(tree: ast.AST) -> Iterator[ast.AST]:
|
|
407
|
+
"""Yield AST nodes that execute at module load time.
|
|
408
|
+
|
|
409
|
+
Skips function and lambda bodies — imports inside those only run
|
|
410
|
+
when the function is called, so they do not constrain the order in
|
|
411
|
+
which bundled modules must be executed. Class bodies and
|
|
412
|
+
``if``/``try``/``with`` statements at module scope *are* executed
|
|
413
|
+
at module load time and are walked normally.
|
|
414
|
+
"""
|
|
415
|
+
if isinstance(tree, (ast.FunctionDef, ast.AsyncFunctionDef, ast.Lambda)):
|
|
416
|
+
return
|
|
417
|
+
yield tree
|
|
418
|
+
for child in ast.iter_child_nodes(tree):
|
|
419
|
+
yield from _iter_module_level_nodes(child)
|
|
420
|
+
|
|
421
|
+
|
|
422
|
+
def _import_node_targets(
|
|
423
|
+
node: ast.Import | ast.ImportFrom, pkg_context: str,
|
|
424
|
+
) -> list[str]:
|
|
425
|
+
"""Return the dotted module paths an import node refers to.
|
|
426
|
+
|
|
427
|
+
For ``from pkg import a, b`` we return ``pkg``, ``pkg.a``, and
|
|
428
|
+
``pkg.b`` — names that turn out to be attributes (not submodules)
|
|
429
|
+
are filtered out by the caller via the ``bundled`` membership check.
|
|
430
|
+
Relative imports are resolved against *pkg_context* using the same
|
|
431
|
+
convention as ``_collect_full_module_paths``.
|
|
432
|
+
"""
|
|
433
|
+
targets: list[str] = []
|
|
434
|
+
if isinstance(node, ast.Import):
|
|
435
|
+
for alias in node.names:
|
|
436
|
+
targets.append(alias.name)
|
|
437
|
+
elif isinstance(node, ast.ImportFrom):
|
|
438
|
+
if node.module and node.level == 0:
|
|
439
|
+
targets.append(node.module)
|
|
440
|
+
for alias in node.names:
|
|
441
|
+
targets.append(f"{node.module}.{alias.name}")
|
|
442
|
+
elif node.level > 0:
|
|
443
|
+
if pkg_context:
|
|
444
|
+
ctx_parts = pkg_context.split(".")
|
|
445
|
+
base = ".".join(ctx_parts[: max(0, len(ctx_parts) - (node.level - 1))])
|
|
446
|
+
if node.module:
|
|
447
|
+
resolved = f"{base}.{node.module}" if base else node.module
|
|
448
|
+
targets.append(resolved)
|
|
449
|
+
for alias in node.names:
|
|
450
|
+
targets.append(f"{resolved}.{alias.name}")
|
|
451
|
+
else:
|
|
452
|
+
for alias in node.names:
|
|
453
|
+
targets.append(f"{base}.{alias.name}" if base else alias.name)
|
|
454
|
+
elif node.module:
|
|
455
|
+
targets.append(node.module)
|
|
456
|
+
return targets
|
|
457
|
+
|
|
458
|
+
|
|
459
|
+
def _collect_full_module_paths(
|
|
460
|
+
source: str, local_top_names: set[str], package_context: str = "",
|
|
461
|
+
) -> set[str]:
|
|
462
|
+
"""Extract full dotted module paths for imports whose top-level name is local.
|
|
463
|
+
|
|
464
|
+
Args:
|
|
465
|
+
source: Python source code to scan.
|
|
466
|
+
local_top_names: Set of top-level module names classified as local.
|
|
467
|
+
package_context: Dotted package name of the module being scanned.
|
|
468
|
+
Used to resolve relative imports (e.g., ``from .defaults import X``
|
|
469
|
+
inside ``local_helpers.config`` becomes ``local_helpers.defaults``).
|
|
470
|
+
"""
|
|
471
|
+
tree = ast.parse(source)
|
|
472
|
+
paths: set[str] = set()
|
|
473
|
+
for node in ast.walk(tree):
|
|
474
|
+
if isinstance(node, ast.Import):
|
|
475
|
+
for alias in node.names:
|
|
476
|
+
top = alias.name.split(".")[0]
|
|
477
|
+
if top in local_top_names:
|
|
478
|
+
paths.add(alias.name)
|
|
479
|
+
elif isinstance(node, ast.ImportFrom):
|
|
480
|
+
if node.module and node.level == 0:
|
|
481
|
+
top = node.module.split(".")[0]
|
|
482
|
+
if top in local_top_names:
|
|
483
|
+
paths.add(node.module)
|
|
484
|
+
# Also add child paths for each imported name — if the
|
|
485
|
+
# imported name is a submodule (e.g. `from pkg.sub import
|
|
486
|
+
# mod` where `pkg/sub/mod.py` exists), it needs to be
|
|
487
|
+
# bundled too. Non-module names (functions, classes) will
|
|
488
|
+
# simply fail to resolve later and be ignored.
|
|
489
|
+
for alias in node.names:
|
|
490
|
+
paths.add(f"{node.module}.{alias.name}")
|
|
491
|
+
elif node.level > 0:
|
|
492
|
+
# Relative import — resolve to absolute path using package context.
|
|
493
|
+
# Relative imports are always local by definition, so no need to
|
|
494
|
+
# check against local_top_names.
|
|
495
|
+
if package_context:
|
|
496
|
+
# Go up `level` packages from the current package
|
|
497
|
+
parts = package_context.split(".")
|
|
498
|
+
base = ".".join(parts[: max(0, len(parts) - (node.level - 1))])
|
|
499
|
+
if node.module:
|
|
500
|
+
resolved = f"{base}.{node.module}" if base else node.module
|
|
501
|
+
paths.add(resolved)
|
|
502
|
+
# Also add child paths for imported names (submodule case)
|
|
503
|
+
for alias in node.names:
|
|
504
|
+
paths.add(f"{resolved}.{alias.name}")
|
|
505
|
+
else:
|
|
506
|
+
# `from . import X` — import names are the modules
|
|
507
|
+
for alias in node.names:
|
|
508
|
+
paths.add(f"{base}.{alias.name}" if base else alias.name)
|
|
509
|
+
elif node.module:
|
|
510
|
+
# No package context — fall back to recording verbatim
|
|
511
|
+
top = node.module.split(".")[0]
|
|
512
|
+
if top in local_top_names:
|
|
513
|
+
paths.add(node.module)
|
|
514
|
+
for alias in node.names:
|
|
515
|
+
paths.add(f"{node.module}.{alias.name}")
|
|
516
|
+
return paths
|
|
517
|
+
|
|
518
|
+
|
|
519
|
+
def _resolve_module_file(dotted_name: str, root: Path) -> Path | None:
|
|
520
|
+
"""Resolve a dotted module name to a source file under *root*.
|
|
521
|
+
|
|
522
|
+
Checks (in order):
|
|
523
|
+
1. ``root/a/b/c.py`` (module)
|
|
524
|
+
2. ``root/a/b/c/__init__.py`` (package)
|
|
525
|
+
3. ``importlib.util.find_spec`` fallback — resolves modules on ``sys.path``
|
|
526
|
+
that live outside *root* (e.g. sibling directories). Only non-installed
|
|
527
|
+
(non-``site-packages``) modules are accepted, and when *root* is an
|
|
528
|
+
explicit ``resolve_root`` the resolved path must share a common project
|
|
529
|
+
ancestor with *root* to prevent bundling code from unrelated projects.
|
|
530
|
+
"""
|
|
531
|
+
parts = dotted_name.replace(".", "/")
|
|
532
|
+
candidate = root / (parts + ".py")
|
|
533
|
+
if candidate.exists():
|
|
534
|
+
return candidate
|
|
535
|
+
candidate = root / parts / "__init__.py"
|
|
536
|
+
if candidate.exists():
|
|
537
|
+
return candidate
|
|
538
|
+
# Fallback: use importlib to find modules in sibling directories
|
|
539
|
+
return _resolve_module_file_via_importlib(dotted_name, root)
|
|
540
|
+
|
|
541
|
+
|
|
542
|
+
def _resolve_module_file_via_importlib(dotted_name: str, root: Path) -> Path | None:
|
|
543
|
+
"""Resolve a dotted module name to a Python **source** file via ``importlib``.
|
|
544
|
+
|
|
545
|
+
Returns the file path only when the module is *not* installed in
|
|
546
|
+
``site-packages`` / ``dist-packages`` (i.e. it is a local project module),
|
|
547
|
+
the origin is a ``.py`` file, and the resolved path shares a common
|
|
548
|
+
ancestor with *root* (i.e. lives in the same project tree). Extension
|
|
549
|
+
modules (``.so``, ``.pyd``) are excluded because the bundler reads source
|
|
550
|
+
text via ``read_text()``.
|
|
551
|
+
|
|
552
|
+
Note: ``find_spec`` may execute parent package ``__init__.py`` files as a
|
|
553
|
+
side effect when resolving dotted names. We catch all exceptions broadly
|
|
554
|
+
so that package-init failures do not break the static generation step.
|
|
555
|
+
"""
|
|
556
|
+
try:
|
|
557
|
+
spec = importlib.util.find_spec(dotted_name)
|
|
558
|
+
if spec is None:
|
|
559
|
+
return None
|
|
560
|
+
origin = spec.origin
|
|
561
|
+
if not origin or origin == "frozen":
|
|
562
|
+
return None
|
|
563
|
+
# Only accept Python source files — extension modules (.so, .pyd)
|
|
564
|
+
# cannot be read as text and must not be bundled.
|
|
565
|
+
if not origin.endswith(".py"):
|
|
566
|
+
return None
|
|
567
|
+
origin_path = Path(origin).resolve()
|
|
568
|
+
if not origin_path.exists():
|
|
569
|
+
return None
|
|
570
|
+
origin_str = str(origin_path)
|
|
571
|
+
if any(marker in origin_str for marker in _INSTALLED_PACKAGE_MARKERS):
|
|
572
|
+
return None
|
|
573
|
+
# Guard: the resolved file must live under the same project tree as
|
|
574
|
+
# root. We check that root and origin share a meaningful common
|
|
575
|
+
# ancestor (more specific than just "/" or a drive letter) to prevent
|
|
576
|
+
# silently bundling code from unrelated projects on sys.path.
|
|
577
|
+
resolved_root = root.resolve()
|
|
578
|
+
try:
|
|
579
|
+
# If origin is under root, great — always accept.
|
|
580
|
+
origin_path.relative_to(resolved_root)
|
|
581
|
+
except ValueError:
|
|
582
|
+
# Origin is outside root. Accept only if they share a common
|
|
583
|
+
# ancestor that is at least 2 levels deep (e.g. /Users/me/project,
|
|
584
|
+
# not just / or /Users).
|
|
585
|
+
common = Path(os.path.commonpath([resolved_root, origin_path]))
|
|
586
|
+
if len(common.parts) <= 2:
|
|
587
|
+
return None
|
|
588
|
+
return origin_path
|
|
589
|
+
except Exception:
|
|
590
|
+
return None
|
|
591
|
+
|
|
592
|
+
|
|
593
|
+
def _resolve_modules_recursive(
|
|
594
|
+
module_paths: set[str],
|
|
595
|
+
root: Path,
|
|
596
|
+
result: dict[str, str],
|
|
597
|
+
visited: set[str],
|
|
598
|
+
pip_deps: list[str] | None,
|
|
599
|
+
) -> None:
|
|
600
|
+
"""Resolve module paths to source text, following transitive local imports."""
|
|
601
|
+
for dotted in sorted(module_paths):
|
|
602
|
+
if dotted in visited:
|
|
603
|
+
continue
|
|
604
|
+
visited.add(dotted)
|
|
605
|
+
|
|
606
|
+
mod_file = _resolve_module_file(dotted, root)
|
|
607
|
+
if not mod_file:
|
|
608
|
+
# Also try the top-level name (package __init__)
|
|
609
|
+
top = dotted.split(".")[0]
|
|
610
|
+
if top not in visited:
|
|
611
|
+
visited.add(top)
|
|
612
|
+
pkg_init = _resolve_module_file(top, root)
|
|
613
|
+
if pkg_init:
|
|
614
|
+
result[top] = pkg_init.read_text()
|
|
615
|
+
continue
|
|
616
|
+
|
|
617
|
+
mod_source = mod_file.read_text()
|
|
618
|
+
result[dotted] = mod_source
|
|
619
|
+
|
|
620
|
+
# Ensure all parent packages are collected (e.g. for "a.b.c",
|
|
621
|
+
# collect "a" and "a.b" __init__.py files). Python always
|
|
622
|
+
# populates parent packages during import resolution, so the
|
|
623
|
+
# bundle must include them for runtime correctness.
|
|
624
|
+
# We also follow transitive imports in each parent __init__.py,
|
|
625
|
+
# since Python executes them at import time and they may pull in
|
|
626
|
+
# sibling modules (e.g. ``from . import helpers``).
|
|
627
|
+
parts = dotted.split(".")
|
|
628
|
+
for i in range(1, len(parts)):
|
|
629
|
+
parent = ".".join(parts[:i])
|
|
630
|
+
if parent not in visited:
|
|
631
|
+
visited.add(parent)
|
|
632
|
+
parent_file = _resolve_module_file(parent, root)
|
|
633
|
+
if parent_file:
|
|
634
|
+
parent_source = parent_file.read_text()
|
|
635
|
+
result[parent] = parent_source
|
|
636
|
+
# Follow transitive local imports within the parent __init__.py
|
|
637
|
+
try:
|
|
638
|
+
parent_classifications = classify_imports(
|
|
639
|
+
parent_file, pip_deps, resolve_root=root, source=parent_source,
|
|
640
|
+
)
|
|
641
|
+
parent_local = {name for name, cls in parent_classifications.items() if cls == "local"}
|
|
642
|
+
if parent_local:
|
|
643
|
+
parent_paths = _collect_full_module_paths(
|
|
644
|
+
parent_source, parent_local, package_context=parent,
|
|
645
|
+
)
|
|
646
|
+
_resolve_modules_recursive(parent_paths, root, result, visited, pip_deps)
|
|
647
|
+
except Exception:
|
|
648
|
+
pass # Best-effort transitive resolution
|
|
649
|
+
|
|
650
|
+
# Follow transitive local imports within this module.
|
|
651
|
+
# Derive the package context so relative imports resolve correctly:
|
|
652
|
+
# e.g., module "local_helpers.config" has package context "local_helpers"
|
|
653
|
+
parts = dotted.split(".")
|
|
654
|
+
pkg_context = ".".join(parts[:-1]) if len(parts) > 1 else dotted
|
|
655
|
+
try:
|
|
656
|
+
child_classifications = classify_imports(mod_file, pip_deps, resolve_root=root, source=mod_source)
|
|
657
|
+
child_local = {name for name, cls in child_classifications.items() if cls == "local"}
|
|
658
|
+
if child_local:
|
|
659
|
+
child_paths = _collect_full_module_paths(mod_source, child_local, package_context=pkg_context)
|
|
660
|
+
_resolve_modules_recursive(child_paths, root, result, visited, pip_deps)
|
|
661
|
+
except Exception:
|
|
662
|
+
pass # Best-effort transitive resolution
|
|
File without changes
|