tangle-cli 0.0.1a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tangle_cli/__init__.py +19 -0
- tangle_cli/api_cli.py +787 -0
- tangle_cli/api_schema.py +633 -0
- tangle_cli/api_transport.py +461 -0
- tangle_cli/args_container.py +244 -0
- tangle_cli/artifacts.py +293 -0
- tangle_cli/artifacts_cli.py +108 -0
- tangle_cli/cli.py +57 -0
- tangle_cli/cli_helpers.py +116 -0
- tangle_cli/cli_options.py +52 -0
- tangle_cli/client.py +677 -0
- tangle_cli/component_from_func.py +1856 -0
- tangle_cli/component_generator.py +298 -0
- tangle_cli/component_inspector.py +494 -0
- tangle_cli/component_publisher.py +921 -0
- tangle_cli/components_cli.py +269 -0
- tangle_cli/dynamic_discovery_client.py +296 -0
- tangle_cli/generated_model_extensions.py +405 -0
- tangle_cli/generated_runtime.py +43 -0
- tangle_cli/handler.py +96 -0
- tangle_cli/hydration_trust.py +222 -0
- tangle_cli/logger.py +166 -0
- tangle_cli/models.py +407 -0
- tangle_cli/module_bundler.py +662 -0
- tangle_cli/openapi/__init__.py +0 -0
- tangle_cli/openapi/codegen.py +1090 -0
- tangle_cli/openapi/parser.py +77 -0
- tangle_cli/pipeline_dehydrator.py +720 -0
- tangle_cli/pipeline_hydrator.py +1785 -0
- tangle_cli/pipeline_run_annotations.py +41 -0
- tangle_cli/pipeline_run_details.py +203 -0
- tangle_cli/pipeline_run_manager.py +1994 -0
- tangle_cli/pipeline_run_search.py +712 -0
- tangle_cli/pipeline_runner.py +620 -0
- tangle_cli/pipeline_runs_cli.py +584 -0
- tangle_cli/pipelines.py +581 -0
- tangle_cli/pipelines_cli.py +271 -0
- tangle_cli/published_components_cli.py +373 -0
- tangle_cli/py.typed +0 -0
- tangle_cli/quickstart.py +110 -0
- tangle_cli/secrets.py +156 -0
- tangle_cli/secrets_cli.py +269 -0
- tangle_cli/utils.py +942 -0
- tangle_cli/version_manager.py +470 -0
- tangle_cli-0.0.1a1.dist-info/METADATA +561 -0
- tangle_cli-0.0.1a1.dist-info/RECORD +48 -0
- tangle_cli-0.0.1a1.dist-info/WHEEL +4 -0
- tangle_cli-0.0.1a1.dist-info/entry_points.txt +3 -0
tangle_cli/utils.py
ADDED
|
@@ -0,0 +1,942 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Generic utility functions for tangle-cli.
|
|
3
|
+
|
|
4
|
+
YAML parsing/dumping, version comparison, digest computation, git metadata
|
|
5
|
+
extraction, and pipeline-spec traversal.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import hashlib
|
|
9
|
+
import os
|
|
10
|
+
import re
|
|
11
|
+
import subprocess
|
|
12
|
+
from collections import OrderedDict
|
|
13
|
+
from collections.abc import Callable, Mapping
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Any
|
|
16
|
+
|
|
17
|
+
import yaml
|
|
18
|
+
|
|
19
|
+
from tangle_cli.logger import Logger, get_default_logger
|
|
20
|
+
|
|
21
|
+
# =============================================================================
|
|
22
|
+
# Generic Data Helpers
|
|
23
|
+
# =============================================================================
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _strip_text_from_graph(implementation: dict[str, Any]) -> None:
|
|
27
|
+
"""Recursively remove raw component text from graph component references."""
|
|
28
|
+
|
|
29
|
+
graph = implementation.get("graph", {})
|
|
30
|
+
for task_data in graph.get("tasks", {}).values():
|
|
31
|
+
ref = task_data.get("componentRef")
|
|
32
|
+
if not ref:
|
|
33
|
+
continue
|
|
34
|
+
ref.pop("text", None)
|
|
35
|
+
spec = ref.get("spec", {})
|
|
36
|
+
nested_impl = spec.get("implementation")
|
|
37
|
+
if nested_impl and "graph" in nested_impl:
|
|
38
|
+
_strip_text_from_graph(nested_impl)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def add_official_prefix(name: str | None) -> str | None:
|
|
42
|
+
"""Return the official component name variant used by registry searches."""
|
|
43
|
+
|
|
44
|
+
if name and not name.startswith("[Official]"):
|
|
45
|
+
return f"[Official] {name}"
|
|
46
|
+
return name
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _value_from_mapping_or_object(value: object, key: str, default: Any = None) -> Any:
|
|
50
|
+
"""Read a field from a mapping, generated model, or attribute object."""
|
|
51
|
+
|
|
52
|
+
if isinstance(value, Mapping):
|
|
53
|
+
return value.get(key, default)
|
|
54
|
+
|
|
55
|
+
get = getattr(value, "get", None)
|
|
56
|
+
if callable(get):
|
|
57
|
+
return get(key, default)
|
|
58
|
+
|
|
59
|
+
to_dict = getattr(value, "to_dict", None)
|
|
60
|
+
if callable(to_dict):
|
|
61
|
+
data = to_dict()
|
|
62
|
+
if isinstance(data, Mapping):
|
|
63
|
+
return data.get(key, default)
|
|
64
|
+
|
|
65
|
+
return getattr(value, key, default)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _optional_str(value: Any) -> str | None:
|
|
69
|
+
"""Return *value* only when it is already a string."""
|
|
70
|
+
|
|
71
|
+
return value if isinstance(value, str) else None
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
# =============================================================================
|
|
75
|
+
# Numeric Helpers
|
|
76
|
+
# =============================================================================
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def clamp(value: float, lower: float, upper: float) -> float:
|
|
80
|
+
"""Return value bounded to the inclusive ``[lower, upper]`` range."""
|
|
81
|
+
return min(max(value, lower), upper)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
# =============================================================================
|
|
85
|
+
# Environment Helpers
|
|
86
|
+
# =============================================================================
|
|
87
|
+
|
|
88
|
+
# Values accepted as truthy for boolean-style env vars across Tangle tooling.
|
|
89
|
+
_TRUTHY_ENV_VALUES = ("1", "true", "yes")
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def tangle_verbose_enabled() -> bool:
|
|
93
|
+
"""Return True if the ``TANGLE_VERBOSE`` env var is set to a truthy value.
|
|
94
|
+
|
|
95
|
+
Truthy values (case-insensitive): ``"1"``, ``"true"``, ``"yes"``. This is
|
|
96
|
+
the canonical check used by the API client, publisher, and hydrator so
|
|
97
|
+
that verbose-only diagnostics behave consistently across the codebase.
|
|
98
|
+
"""
|
|
99
|
+
return os.environ.get("TANGLE_VERBOSE", "").lower() in _TRUTHY_ENV_VALUES
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
# =============================================================================
|
|
103
|
+
# Component-Path Conventions
|
|
104
|
+
# =============================================================================
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def find_documentation_path_for_yaml(yaml_path: Path) -> str | None:
|
|
108
|
+
"""Return ``docs/<stem>.md`` next to a component YAML, if it exists.
|
|
109
|
+
|
|
110
|
+
Encodes the convention that a component YAML at ``foo/bar.yaml`` carries
|
|
111
|
+
its human-readable docs at ``foo/docs/bar.md``. Returns the absolute
|
|
112
|
+
path as a string, or ``None`` when no such file exists.
|
|
113
|
+
"""
|
|
114
|
+
docs_path = yaml_path.parent / "docs" / f"{yaml_path.stem}.md"
|
|
115
|
+
return str(docs_path.resolve()) if docs_path.exists() else None
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
# =============================================================================
|
|
119
|
+
# String / Template Helpers
|
|
120
|
+
# =============================================================================
|
|
121
|
+
|
|
122
|
+
# Recognizes ``${name}`` or ``${name:-default}`` placeholders. The syntax
|
|
123
|
+
# is borrowed from POSIX parameter expansion for familiarity, but these
|
|
124
|
+
# placeholders have nothing to do with shells, processes, or environments
|
|
125
|
+
# — they're filled from an explicit ``vars`` dict, never from
|
|
126
|
+
# ``os.environ``. ``name`` follows Python identifier rules (letter or
|
|
127
|
+
# underscore start, then alphanumerics / underscores). ``default`` is
|
|
128
|
+
# everything up to the closing ``}`` and may be empty (``${name:-}``).
|
|
129
|
+
#
|
|
130
|
+
# Convention: prefer lowercase / snake_case ``name``s. Uppercase reads as
|
|
131
|
+
# an env-var reference and risks misleading readers about what's actually
|
|
132
|
+
# providing the values.
|
|
133
|
+
_VAR_RE = re.compile(r"\$\{([A-Za-z_][A-Za-z0-9_]*)(?::-([^}]*))?\}")
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
class UnsetVarError(KeyError):
|
|
137
|
+
"""Raised when a strict ``${name}`` placeholder has no value and no default.
|
|
138
|
+
|
|
139
|
+
A ``KeyError`` subclass so existing ``except KeyError`` handlers keep
|
|
140
|
+
working; the dedicated type lets callers distinguish unresolved
|
|
141
|
+
placeholders from incidental ``KeyError``s if they want a clearer
|
|
142
|
+
error message.
|
|
143
|
+
"""
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def expand_vars(text: str, vars: dict[str, str]) -> str:
|
|
147
|
+
"""Expand ``${name}`` / ``${name:-default}`` placeholders in ``text``.
|
|
148
|
+
|
|
149
|
+
Mirrors ``os.path.expandvars`` in syntax, but reads from an explicit
|
|
150
|
+
``vars`` dict instead of ``os.environ`` — these are *not* environment
|
|
151
|
+
variables, despite the syntax similarity. Lowercase / snake_case
|
|
152
|
+
names are conventional here (uppercase would mislead readers who treat
|
|
153
|
+
the same syntax as env-var interpolation in shells/Docker/etc.).
|
|
154
|
+
Recognized forms:
|
|
155
|
+
|
|
156
|
+
* ``${name}`` — strict; raises :class:`UnsetVarError` (a ``KeyError``
|
|
157
|
+
subclass) if ``name`` is missing from ``vars``.
|
|
158
|
+
* ``${name:-default}`` — falls back to the literal ``default`` text when
|
|
159
|
+
``name`` is missing. ``${name:-}`` substitutes the empty string.
|
|
160
|
+
|
|
161
|
+
Substitution is purely textual; values are inserted verbatim. Callers
|
|
162
|
+
that interpolate into structured formats (YAML, JSON, shell commands,
|
|
163
|
+
…) should quote the placeholder appropriately so unusual values can't
|
|
164
|
+
break the surrounding syntax — e.g. for YAML, write
|
|
165
|
+
``image: "${image:-}"`` so a value beginning with ``*`` doesn't get
|
|
166
|
+
parsed as an alias reference.
|
|
167
|
+
|
|
168
|
+
Args:
|
|
169
|
+
text: The text containing zero or more placeholders.
|
|
170
|
+
vars: Flat ``{name: stringified_value}`` map. Empty/None falls back
|
|
171
|
+
to a no-op when no placeholders are present in ``text``.
|
|
172
|
+
|
|
173
|
+
Returns:
|
|
174
|
+
``text`` with every recognized placeholder replaced.
|
|
175
|
+
|
|
176
|
+
Raises:
|
|
177
|
+
UnsetVarError: A strict ``${name}`` placeholder had no
|
|
178
|
+
corresponding entry in ``vars``.
|
|
179
|
+
"""
|
|
180
|
+
if not vars and "${" not in text:
|
|
181
|
+
return text
|
|
182
|
+
|
|
183
|
+
def _replace(m: re.Match[str]) -> str:
|
|
184
|
+
name = m.group(1)
|
|
185
|
+
default = m.group(2)
|
|
186
|
+
if name in vars:
|
|
187
|
+
return vars[name]
|
|
188
|
+
if default is not None:
|
|
189
|
+
return default
|
|
190
|
+
raise UnsetVarError(name)
|
|
191
|
+
|
|
192
|
+
return _VAR_RE.sub(_replace, text)
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def resolve_input_path(path: Path, config_dir: Path | None) -> Path:
|
|
196
|
+
"""Resolve a relative input path by trying cwd first, then the config directory.
|
|
197
|
+
|
|
198
|
+
Used to make config file entries portable: a relative input path like
|
|
199
|
+
``pipelines/foo.yaml`` is tried against the cwd first (preserving existing
|
|
200
|
+
behavior), then against the config file's directory as a fallback.
|
|
201
|
+
|
|
202
|
+
Args:
|
|
203
|
+
path: Input path to resolve.
|
|
204
|
+
config_dir: Directory of the config file. If ``None``, path is returned unchanged.
|
|
205
|
+
|
|
206
|
+
Returns:
|
|
207
|
+
The resolved absolute path, or the original path if nothing matched.
|
|
208
|
+
"""
|
|
209
|
+
if config_dir is None or path.is_absolute() or path.exists():
|
|
210
|
+
return path
|
|
211
|
+
candidate = config_dir / path
|
|
212
|
+
return candidate.resolve() if candidate.exists() else path
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
# =============================================================================
|
|
216
|
+
# Dict merge helpers
|
|
217
|
+
# =============================================================================
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def apply_defaults(
|
|
221
|
+
entries: dict[str, Any] | list[dict[str, Any]],
|
|
222
|
+
defaults: dict[str, Any],
|
|
223
|
+
) -> dict[str, Any] | list[dict[str, Any]]:
|
|
224
|
+
"""Shallow-merge *defaults* into *entries* (entry values take precedence).
|
|
225
|
+
|
|
226
|
+
Works on a single dict, a list of dicts, or a dict-of-dicts (keyed entries).
|
|
227
|
+
For a dict-of-dicts, keys starting with ``_`` are excluded from merging
|
|
228
|
+
(they are metadata like ``_defaults`` itself).
|
|
229
|
+
|
|
230
|
+
Args:
|
|
231
|
+
entries: The entries to merge defaults into.
|
|
232
|
+
defaults: Default values (overridden by entry values).
|
|
233
|
+
|
|
234
|
+
Returns:
|
|
235
|
+
Merged result in the same shape as *entries*.
|
|
236
|
+
"""
|
|
237
|
+
if isinstance(entries, list):
|
|
238
|
+
return [{**defaults, **item} if isinstance(item, dict) else item for item in entries]
|
|
239
|
+
return {**defaults, **entries}
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
# =============================================================================
|
|
243
|
+
# Digest Utilities
|
|
244
|
+
# =============================================================================
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def compute_text_digest(text: str) -> str:
|
|
248
|
+
"""Compute a SHA256 digest from raw text.
|
|
249
|
+
|
|
250
|
+
Args:
|
|
251
|
+
text: The text to hash.
|
|
252
|
+
|
|
253
|
+
Returns:
|
|
254
|
+
Hex digest string.
|
|
255
|
+
"""
|
|
256
|
+
return hashlib.sha256(text.encode("utf-8")).hexdigest()
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
def compute_spec_digest(spec: dict[str, Any]) -> str:
|
|
260
|
+
"""Compute a SHA256 digest for a component spec.
|
|
261
|
+
|
|
262
|
+
Args:
|
|
263
|
+
spec: The component spec dict.
|
|
264
|
+
|
|
265
|
+
Returns:
|
|
266
|
+
Hex digest string.
|
|
267
|
+
"""
|
|
268
|
+
# Serialize spec to YAML with sorted keys for deterministic output
|
|
269
|
+
yaml_str = dump_yaml(spec, sort_keys=True)
|
|
270
|
+
return compute_text_digest(yaml_str)
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
# Type alias for task processor callback
|
|
274
|
+
# Receives (task_name, task_data, path, base_dir) and returns processed task_data.
|
|
275
|
+
TaskProcessor = Callable[[str, dict[str, Any], str, Path | None, dict[str, Any] | None], dict[str, Any]]
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
def is_subgraph_spec(spec: dict[str, Any] | None) -> bool:
|
|
279
|
+
"""Check if a spec contains a subgraph (has implementation.graph)."""
|
|
280
|
+
if not spec:
|
|
281
|
+
return False
|
|
282
|
+
return "graph" in spec.get("implementation", {})
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
def is_graph_task(task_data: dict[str, Any]) -> bool:
|
|
286
|
+
"""Check if a task has a componentRef that is a subgraph.
|
|
287
|
+
|
|
288
|
+
Args:
|
|
289
|
+
task_data: The task dict to check.
|
|
290
|
+
|
|
291
|
+
Returns:
|
|
292
|
+
True if the task has a componentRef with nested implementation.graph.
|
|
293
|
+
"""
|
|
294
|
+
component_ref = task_data.get("componentRef")
|
|
295
|
+
if not isinstance(component_ref, dict):
|
|
296
|
+
return False
|
|
297
|
+
return is_subgraph_spec(component_ref.get("spec", {}))
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
def get_component_ref_info(component_ref: dict[str, Any]) -> tuple[str, str]:
|
|
301
|
+
"""Extract name and digest from a componentRef.
|
|
302
|
+
|
|
303
|
+
Args:
|
|
304
|
+
component_ref: The componentRef dict (must have spec.name and digest).
|
|
305
|
+
|
|
306
|
+
Returns:
|
|
307
|
+
Tuple of (name, digest).
|
|
308
|
+
"""
|
|
309
|
+
name = component_ref.get("spec", {}).get("name", "unknown")
|
|
310
|
+
digest = component_ref.get("digest", "unknown")
|
|
311
|
+
return name, digest
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
def _strip_internal_annotations(spec: dict[str, Any]) -> None:
|
|
315
|
+
"""Remove all internal underscore-prefixed keys from a spec dict.
|
|
316
|
+
|
|
317
|
+
These keys (e.g. ``_source_dir``, ``_recursive_params``) are used during
|
|
318
|
+
traversal and must not leak into the final output.
|
|
319
|
+
"""
|
|
320
|
+
for key in [k for k in spec if k.startswith("_")]:
|
|
321
|
+
del spec[key]
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
def _extract_source_dir(spec: dict[str, Any], fallback: Path | None) -> Path | None:
|
|
325
|
+
"""Extract and remove _source_dir annotation from a spec.
|
|
326
|
+
|
|
327
|
+
When a component is loaded from a local file, _source_dir is set to the
|
|
328
|
+
directory containing that file. This allows nested file:// references to
|
|
329
|
+
be resolved relative to the file they appear in, not the top-level pipeline.
|
|
330
|
+
"""
|
|
331
|
+
source_dir = spec.pop("_source_dir", None)
|
|
332
|
+
if source_dir is not None:
|
|
333
|
+
return Path(source_dir)
|
|
334
|
+
return fallback
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
def _extract_recursive_params(
|
|
338
|
+
spec: dict[str, Any], fallback: dict[str, Any] | None,
|
|
339
|
+
) -> dict[str, Any] | None:
|
|
340
|
+
"""Extract and remove _recursive_params annotation from a spec.
|
|
341
|
+
|
|
342
|
+
When recursive context is active, _recursive_params carries the accumulated
|
|
343
|
+
template parameters for this subtree. Works like _source_dir: the value is
|
|
344
|
+
consumed here and threaded through the recursive traversal.
|
|
345
|
+
"""
|
|
346
|
+
return spec.pop("_recursive_params", fallback)
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
def traverse_pipeline_tasks(
|
|
350
|
+
spec: dict[str, Any],
|
|
351
|
+
parent_name: str,
|
|
352
|
+
task_processor: TaskProcessor,
|
|
353
|
+
base_dir: Path | None = None,
|
|
354
|
+
recursive_params: dict[str, Any] | None = None,
|
|
355
|
+
) -> dict[str, Any]:
|
|
356
|
+
"""Traverse a pipeline/component spec and process each task recursively.
|
|
357
|
+
|
|
358
|
+
This function walks through implementation.graph.tasks. For each task:
|
|
359
|
+
- If it's a subgraph (has componentRef with nested graph), recurse into it without processing
|
|
360
|
+
- Otherwise, call task_processor to handle the task
|
|
361
|
+
|
|
362
|
+
When a nested spec has a '_source_dir' annotation (set when a component was
|
|
363
|
+
loaded from a local file), the base_dir is updated for that subtree so that
|
|
364
|
+
nested file:// references resolve relative to the loaded file.
|
|
365
|
+
|
|
366
|
+
Similarly, '_recursive_params' carries accumulated template parameters for
|
|
367
|
+
recursive context propagation. Like _source_dir, the value is extracted from
|
|
368
|
+
specs at recursion boundaries and threaded through to the task processor.
|
|
369
|
+
|
|
370
|
+
Args:
|
|
371
|
+
spec: The component/pipeline spec with implementation.graph.tasks structure.
|
|
372
|
+
parent_name: Name prefix for path display (e.g., pipeline name).
|
|
373
|
+
task_processor: Callback to process non-subgraph tasks.
|
|
374
|
+
Receives (task_name, task_data, path, base_dir, recursive_params)
|
|
375
|
+
and returns the processed task dict.
|
|
376
|
+
base_dir: Base directory for resolving relative file paths. Updated
|
|
377
|
+
automatically when entering specs loaded from local files
|
|
378
|
+
(via _source_dir annotation).
|
|
379
|
+
recursive_params: Accumulated template parameters for recursive context.
|
|
380
|
+
Updated automatically when entering specs with
|
|
381
|
+
_recursive_params annotation.
|
|
382
|
+
|
|
383
|
+
Returns:
|
|
384
|
+
The spec with all tasks processed (including nested subgraph tasks).
|
|
385
|
+
"""
|
|
386
|
+
implementation = spec.get("implementation", {})
|
|
387
|
+
graph = implementation.get("graph", {})
|
|
388
|
+
tasks = graph.get("tasks", {})
|
|
389
|
+
|
|
390
|
+
if not tasks:
|
|
391
|
+
return spec
|
|
392
|
+
|
|
393
|
+
processed_tasks = {}
|
|
394
|
+
for task_name, task_data in tasks.items():
|
|
395
|
+
path = f"{parent_name}.{task_name}" if parent_name else task_name
|
|
396
|
+
|
|
397
|
+
# If task is a subgraph, recurse into it without processing
|
|
398
|
+
if is_graph_task(task_data):
|
|
399
|
+
component_ref = task_data["componentRef"]
|
|
400
|
+
nested_spec = component_ref.get("spec", {})
|
|
401
|
+
nested_name = component_ref.get("name", task_name)
|
|
402
|
+
nested_base_dir = _extract_source_dir(nested_spec, base_dir)
|
|
403
|
+
nested_params = _extract_recursive_params(nested_spec, recursive_params)
|
|
404
|
+
|
|
405
|
+
resolved_nested_spec = traverse_pipeline_tasks(
|
|
406
|
+
nested_spec, nested_name, task_processor, nested_base_dir, nested_params
|
|
407
|
+
)
|
|
408
|
+
_strip_internal_annotations(resolved_nested_spec)
|
|
409
|
+
|
|
410
|
+
if resolved_nested_spec != nested_spec:
|
|
411
|
+
processed_task = dict(task_data)
|
|
412
|
+
# Use spec name as fallback, compute digest if not present
|
|
413
|
+
new_ref = {
|
|
414
|
+
"name": component_ref.get("name") or nested_spec.get("name", ""),
|
|
415
|
+
"digest": component_ref.get("digest") or compute_spec_digest(resolved_nested_spec),
|
|
416
|
+
"spec": resolved_nested_spec,
|
|
417
|
+
}
|
|
418
|
+
processed_task["componentRef"] = new_ref
|
|
419
|
+
else:
|
|
420
|
+
processed_task = task_data
|
|
421
|
+
else:
|
|
422
|
+
# Process non-subgraph tasks, passing current base_dir and recursive params
|
|
423
|
+
processed_task = task_processor(task_name, task_data, path, base_dir, recursive_params)
|
|
424
|
+
|
|
425
|
+
# If processing created a subgraph, recurse into it
|
|
426
|
+
if is_graph_task(processed_task):
|
|
427
|
+
component_ref = processed_task["componentRef"]
|
|
428
|
+
nested_spec = component_ref.get("spec", {})
|
|
429
|
+
nested_name = component_ref.get("name", task_name)
|
|
430
|
+
nested_base_dir = _extract_source_dir(nested_spec, base_dir)
|
|
431
|
+
nested_params = _extract_recursive_params(nested_spec, recursive_params)
|
|
432
|
+
|
|
433
|
+
resolved_nested_spec = traverse_pipeline_tasks(
|
|
434
|
+
nested_spec, nested_name, task_processor, nested_base_dir, nested_params
|
|
435
|
+
)
|
|
436
|
+
_strip_internal_annotations(resolved_nested_spec)
|
|
437
|
+
|
|
438
|
+
if resolved_nested_spec != nested_spec:
|
|
439
|
+
processed_task = dict(processed_task)
|
|
440
|
+
# Use spec name as fallback, compute digest if not present
|
|
441
|
+
new_ref = {
|
|
442
|
+
"name": component_ref.get("name") or nested_spec.get("name", ""),
|
|
443
|
+
"digest": component_ref.get("digest") or compute_spec_digest(resolved_nested_spec),
|
|
444
|
+
"spec": resolved_nested_spec,
|
|
445
|
+
}
|
|
446
|
+
processed_task["componentRef"] = new_ref
|
|
447
|
+
else:
|
|
448
|
+
# Strip internal annotations from non-subgraph specs (no nested tasks to resolve)
|
|
449
|
+
cr = processed_task.get("componentRef")
|
|
450
|
+
if isinstance(cr, dict):
|
|
451
|
+
s = cr.get("spec")
|
|
452
|
+
if isinstance(s, dict):
|
|
453
|
+
_strip_internal_annotations(s)
|
|
454
|
+
|
|
455
|
+
processed_tasks[task_name] = processed_task
|
|
456
|
+
|
|
457
|
+
# Rebuild the spec with processed tasks
|
|
458
|
+
result = dict(spec)
|
|
459
|
+
result["implementation"] = dict(implementation)
|
|
460
|
+
result["implementation"]["graph"] = dict(graph)
|
|
461
|
+
result["implementation"]["graph"]["tasks"] = processed_tasks
|
|
462
|
+
return result
|
|
463
|
+
|
|
464
|
+
|
|
465
|
+
def parse_yaml_string(yaml_content, logger: Logger | None = None):
|
|
466
|
+
"""
|
|
467
|
+
Parse a YAML string into a data structure.
|
|
468
|
+
|
|
469
|
+
Args:
|
|
470
|
+
yaml_content: YAML string content
|
|
471
|
+
|
|
472
|
+
Returns:
|
|
473
|
+
Parsed data structure or None if parsing fails
|
|
474
|
+
"""
|
|
475
|
+
log = logger or get_default_logger()
|
|
476
|
+
|
|
477
|
+
# Setup YAML to properly handle OrderedDict and compact lists
|
|
478
|
+
def represent_ordereddict(dumper, data):
|
|
479
|
+
return dumper.represent_dict(data.items())
|
|
480
|
+
|
|
481
|
+
yaml.add_representer(OrderedDict, represent_ordereddict)
|
|
482
|
+
|
|
483
|
+
try:
|
|
484
|
+
return yaml.safe_load(yaml_content)
|
|
485
|
+
except Exception as e:
|
|
486
|
+
import traceback
|
|
487
|
+
log.error(f"YAML parsing error: {e}")
|
|
488
|
+
log.error(f"Traceback: {traceback.format_exc()}")
|
|
489
|
+
return None
|
|
490
|
+
|
|
491
|
+
|
|
492
|
+
class _LiteralBlockDumper(yaml.SafeDumper):
|
|
493
|
+
"""YAML dumper that uses literal block style (|) for multiline strings."""
|
|
494
|
+
pass
|
|
495
|
+
|
|
496
|
+
|
|
497
|
+
def _literal_str_representer(dumper: yaml.SafeDumper, data: str) -> yaml.ScalarNode:
|
|
498
|
+
if '\n' in data:
|
|
499
|
+
return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='|')
|
|
500
|
+
return dumper.represent_scalar('tag:yaml.org,2002:str', data)
|
|
501
|
+
|
|
502
|
+
|
|
503
|
+
_LiteralBlockDumper.add_representer(str, _literal_str_representer)
|
|
504
|
+
|
|
505
|
+
|
|
506
|
+
def dump_yaml(data: dict[str, Any], sort_keys: bool = False, width: int | None = None) -> str:
|
|
507
|
+
"""
|
|
508
|
+
Dump a data structure to a YAML string with consistent formatting.
|
|
509
|
+
|
|
510
|
+
Multiline strings are rendered using literal block style (|).
|
|
511
|
+
|
|
512
|
+
Args:
|
|
513
|
+
data: Dictionary to serialize to YAML
|
|
514
|
+
sort_keys: Whether to sort dictionary keys (default: False)
|
|
515
|
+
width: Line width limit (default: None, no limit)
|
|
516
|
+
|
|
517
|
+
Returns:
|
|
518
|
+
YAML string
|
|
519
|
+
"""
|
|
520
|
+
return yaml.dump(
|
|
521
|
+
data, Dumper=_LiteralBlockDumper,
|
|
522
|
+
default_flow_style=False, sort_keys=sort_keys, allow_unicode=True, width=width,
|
|
523
|
+
)
|
|
524
|
+
|
|
525
|
+
|
|
526
|
+
def get_version_from_data(data):
|
|
527
|
+
"""
|
|
528
|
+
Extract version from a data dictionary (parsed YAML structure).
|
|
529
|
+
|
|
530
|
+
Checks metadata.annotations.version first (preferred), then falls back
|
|
531
|
+
to top-level version for backward compatibility.
|
|
532
|
+
|
|
533
|
+
Args:
|
|
534
|
+
data: Dictionary containing the parsed YAML structure
|
|
535
|
+
|
|
536
|
+
Returns:
|
|
537
|
+
Version string or None if not found
|
|
538
|
+
"""
|
|
539
|
+
if not data:
|
|
540
|
+
return None
|
|
541
|
+
|
|
542
|
+
# Check metadata.annotations.version first (preferred location)
|
|
543
|
+
metadata = data.get('metadata')
|
|
544
|
+
if metadata:
|
|
545
|
+
annotations = metadata.get('annotations')
|
|
546
|
+
if annotations and 'version' in annotations:
|
|
547
|
+
return str(annotations['version'])
|
|
548
|
+
|
|
549
|
+
# Fall back to top-level version for backward compatibility
|
|
550
|
+
if 'version' in data:
|
|
551
|
+
return str(data['version'])
|
|
552
|
+
|
|
553
|
+
return None
|
|
554
|
+
|
|
555
|
+
|
|
556
|
+
def get_version_component(parts, index, default=0):
|
|
557
|
+
"""
|
|
558
|
+
Get version component at index as int, or default if not parseable.
|
|
559
|
+
|
|
560
|
+
Args:
|
|
561
|
+
parts: List of version components
|
|
562
|
+
index: Index to retrieve
|
|
563
|
+
default: Default value if component is missing or not numeric
|
|
564
|
+
|
|
565
|
+
Returns:
|
|
566
|
+
Integer version component or default
|
|
567
|
+
"""
|
|
568
|
+
try:
|
|
569
|
+
return int(parts[index]) if index < len(parts) else default
|
|
570
|
+
except (ValueError, TypeError, IndexError):
|
|
571
|
+
return default
|
|
572
|
+
|
|
573
|
+
|
|
574
|
+
def compare_versions(a: str, b: str) -> int:
|
|
575
|
+
"""Compare two version strings component-wise, returning -1, 0, or 1.
|
|
576
|
+
|
|
577
|
+
Unlike :func:`check_versions`, this pads the shorter version with
|
|
578
|
+
zeros so that ``1.0.1`` is correctly greater than ``1.0``.
|
|
579
|
+
|
|
580
|
+
Args:
|
|
581
|
+
a: First version string (e.g. "1.2.3").
|
|
582
|
+
b: Second version string (e.g. "1.2").
|
|
583
|
+
|
|
584
|
+
Returns:
|
|
585
|
+
-1 if a < b, 0 if a == b, 1 if a > b.
|
|
586
|
+
"""
|
|
587
|
+
a_parts = a.split(".")
|
|
588
|
+
b_parts = b.split(".")
|
|
589
|
+
length = max(len(a_parts), len(b_parts))
|
|
590
|
+
for i in range(length):
|
|
591
|
+
a_val = get_version_component(a_parts, i)
|
|
592
|
+
b_val = get_version_component(b_parts, i)
|
|
593
|
+
if a_val > b_val:
|
|
594
|
+
return 1
|
|
595
|
+
if a_val < b_val:
|
|
596
|
+
return -1
|
|
597
|
+
return 0
|
|
598
|
+
|
|
599
|
+
|
|
600
|
+
def check_versions(local_version, latest_version, check_precedence=False):
|
|
601
|
+
"""Check if a version update should proceed.
|
|
602
|
+
|
|
603
|
+
Thin wrapper around :func:`compare_versions` for backward compatibility.
|
|
604
|
+
|
|
605
|
+
Args:
|
|
606
|
+
local_version: The local version string.
|
|
607
|
+
latest_version: The latest published version (or None if not found).
|
|
608
|
+
check_precedence: If True, return True only when *local* is strictly
|
|
609
|
+
newer. If False (default), return True when versions differ.
|
|
610
|
+
|
|
611
|
+
Returns:
|
|
612
|
+
bool: True if should proceed with update, False if should skip.
|
|
613
|
+
"""
|
|
614
|
+
if not latest_version:
|
|
615
|
+
return True
|
|
616
|
+
|
|
617
|
+
cmp = compare_versions(local_version, latest_version)
|
|
618
|
+
|
|
619
|
+
if check_precedence:
|
|
620
|
+
return cmp > 0
|
|
621
|
+
return cmp != 0
|
|
622
|
+
|
|
623
|
+
|
|
624
|
+
# =============================================================================
|
|
625
|
+
# Git info collection
|
|
626
|
+
# =============================================================================
|
|
627
|
+
|
|
628
|
+
|
|
629
|
+
def get_git_root(directory: Path) -> Path | None:
|
|
630
|
+
"""Find the git repository root for a directory."""
|
|
631
|
+
try:
|
|
632
|
+
result = subprocess.run(
|
|
633
|
+
["git", "rev-parse", "--show-toplevel"],
|
|
634
|
+
cwd=str(directory), capture_output=True, text=True, timeout=5,
|
|
635
|
+
)
|
|
636
|
+
if result.returncode == 0:
|
|
637
|
+
return Path(result.stdout.strip())
|
|
638
|
+
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
|
639
|
+
pass
|
|
640
|
+
return None
|
|
641
|
+
|
|
642
|
+
|
|
643
|
+
def get_git_info(directory: Path, logger: Logger | None = None) -> dict[str, str]:
|
|
644
|
+
"""Collect git metadata for annotations.
|
|
645
|
+
|
|
646
|
+
Uses subprocess git commands to avoid requiring gitpython.
|
|
647
|
+
The returned dict includes a ``_git_root`` key (absolute path to the
|
|
648
|
+
repository root) so callers can compute relative paths without a
|
|
649
|
+
second subprocess call. This key is prefixed with ``_`` to signal
|
|
650
|
+
it is not a component annotation and should not be persisted.
|
|
651
|
+
"""
|
|
652
|
+
info: dict[str, str] = {}
|
|
653
|
+
|
|
654
|
+
try:
|
|
655
|
+
# Find git root
|
|
656
|
+
result = subprocess.run(
|
|
657
|
+
["git", "rev-parse", "--show-toplevel"],
|
|
658
|
+
cwd=str(directory), capture_output=True, text=True, timeout=5,
|
|
659
|
+
)
|
|
660
|
+
if result.returncode != 0:
|
|
661
|
+
if logger:
|
|
662
|
+
stderr = result.stderr.strip() if result.stderr else "unknown reason"
|
|
663
|
+
logger.warn(f"⚠️ Not a git repository ({stderr}). "
|
|
664
|
+
"Will try CI environment variables.")
|
|
665
|
+
else:
|
|
666
|
+
git_root = Path(result.stdout.strip())
|
|
667
|
+
info["_git_root"] = str(git_root)
|
|
668
|
+
|
|
669
|
+
# git_relative_dir
|
|
670
|
+
try:
|
|
671
|
+
rel_dir = directory.resolve().relative_to(git_root)
|
|
672
|
+
info["git_relative_dir"] = rel_dir.as_posix()
|
|
673
|
+
except ValueError:
|
|
674
|
+
pass
|
|
675
|
+
|
|
676
|
+
# git_local_branch
|
|
677
|
+
result = subprocess.run(
|
|
678
|
+
["git", "rev-parse", "--abbrev-ref", "HEAD"],
|
|
679
|
+
cwd=str(directory), capture_output=True, text=True, timeout=5,
|
|
680
|
+
)
|
|
681
|
+
if result.returncode == 0:
|
|
682
|
+
info["git_local_branch"] = result.stdout.strip()
|
|
683
|
+
|
|
684
|
+
# git_local_sha
|
|
685
|
+
result = subprocess.run(
|
|
686
|
+
["git", "rev-parse", "HEAD"],
|
|
687
|
+
cwd=str(directory), capture_output=True, text=True, timeout=5,
|
|
688
|
+
)
|
|
689
|
+
if result.returncode == 0:
|
|
690
|
+
info["git_local_sha"] = result.stdout.strip()
|
|
691
|
+
|
|
692
|
+
# Tracking branch info
|
|
693
|
+
result = subprocess.run(
|
|
694
|
+
["git", "rev-parse", "--abbrev-ref", "--symbolic-full-name", "@{u}"],
|
|
695
|
+
cwd=str(directory), capture_output=True, text=True, timeout=5,
|
|
696
|
+
)
|
|
697
|
+
if result.returncode == 0:
|
|
698
|
+
tracking = result.stdout.strip() # e.g., "origin/main"
|
|
699
|
+
parts = tracking.split("/", 1)
|
|
700
|
+
if len(parts) == 2:
|
|
701
|
+
remote_name, remote_branch = parts
|
|
702
|
+
info["git_remote_branch"] = remote_branch
|
|
703
|
+
|
|
704
|
+
# Remote URL
|
|
705
|
+
result = subprocess.run(
|
|
706
|
+
["git", "remote", "get-url", remote_name],
|
|
707
|
+
cwd=str(directory), capture_output=True, text=True, timeout=5,
|
|
708
|
+
)
|
|
709
|
+
if result.returncode == 0:
|
|
710
|
+
info["git_remote_url"] = result.stdout.strip()
|
|
711
|
+
|
|
712
|
+
# Remote SHA
|
|
713
|
+
result = subprocess.run(
|
|
714
|
+
["git", "rev-parse", tracking],
|
|
715
|
+
cwd=str(directory), capture_output=True, text=True, timeout=5,
|
|
716
|
+
)
|
|
717
|
+
if result.returncode == 0:
|
|
718
|
+
info["git_remote_sha"] = result.stdout.strip()
|
|
719
|
+
|
|
720
|
+
# Fallback: if no tracking branch, use local sha/branch and origin URL
|
|
721
|
+
if "git_remote_url" not in info:
|
|
722
|
+
result = subprocess.run(
|
|
723
|
+
["git", "remote", "get-url", "origin"],
|
|
724
|
+
cwd=str(directory), capture_output=True, text=True, timeout=5,
|
|
725
|
+
)
|
|
726
|
+
if result.returncode == 0:
|
|
727
|
+
info["git_remote_url"] = result.stdout.strip()
|
|
728
|
+
if "git_remote_sha" not in info and "git_local_sha" in info:
|
|
729
|
+
info["git_remote_sha"] = info["git_local_sha"]
|
|
730
|
+
if "git_remote_branch" not in info and "git_local_branch" in info:
|
|
731
|
+
info["git_remote_branch"] = info["git_local_branch"]
|
|
732
|
+
|
|
733
|
+
except (subprocess.TimeoutExpired, FileNotFoundError, OSError) as e:
|
|
734
|
+
if logger:
|
|
735
|
+
logger.warn(f"⚠️ Git not available ({type(e).__name__}: {e}). "
|
|
736
|
+
"Will try CI environment variables.")
|
|
737
|
+
|
|
738
|
+
# Fallback: populate missing fields from CI environment variables
|
|
739
|
+
_fill_from_ci_env(info)
|
|
740
|
+
|
|
741
|
+
# Normalize SSH git URLs to HTTPS (e.g. git@github.com:Org/repo.git -> https://github.com/Org/repo.git)
|
|
742
|
+
if "git_remote_url" in info:
|
|
743
|
+
info["git_remote_url"] = _normalize_git_url(info["git_remote_url"])
|
|
744
|
+
|
|
745
|
+
# Log resolved git metadata and warn about missing fields
|
|
746
|
+
if logger:
|
|
747
|
+
logger.info(" Git metadata resolved:")
|
|
748
|
+
logger.info(f" _git_root: {info.get('_git_root', '(not set)')}")
|
|
749
|
+
logger.info(f" git_remote_sha: {info.get('git_remote_sha', '(not set)')}")
|
|
750
|
+
logger.info(f" git_remote_branch: {info.get('git_remote_branch', '(not set)')}")
|
|
751
|
+
logger.info(f" git_remote_url: {info.get('git_remote_url', '(not set)')}")
|
|
752
|
+
|
|
753
|
+
missing = []
|
|
754
|
+
if "_git_root" not in info:
|
|
755
|
+
missing.append("git_root (needed for component_yaml_path)")
|
|
756
|
+
if "git_remote_url" not in info:
|
|
757
|
+
missing.append("git_remote_url")
|
|
758
|
+
if "git_remote_sha" not in info:
|
|
759
|
+
missing.append("git_remote_sha")
|
|
760
|
+
if "git_remote_branch" not in info:
|
|
761
|
+
missing.append("git_remote_branch")
|
|
762
|
+
if missing:
|
|
763
|
+
logger.warn(
|
|
764
|
+
f"⚠️ Missing git metadata: {', '.join(missing)}. "
|
|
765
|
+
"Published components will lack source links and transparency signals. "
|
|
766
|
+
"Pass --git-remote-sha/--git-remote-branch/--git-remote-url or run from a git repo."
|
|
767
|
+
)
|
|
768
|
+
|
|
769
|
+
return info
|
|
770
|
+
|
|
771
|
+
|
|
772
|
+
def set_component_yaml_path(rel_path: str, annotations: dict[str, str], *, overwrite: bool = True) -> None:
|
|
773
|
+
"""Split a repo-relative path into git_relative_dir and component_yaml_path annotations.
|
|
774
|
+
|
|
775
|
+
Given ``"a/b/comp.yaml"``, sets ``git_relative_dir="a/b"`` and
|
|
776
|
+
``component_yaml_path="comp.yaml"``. For a bare filename like
|
|
777
|
+
``"comp.yaml"``, only ``component_yaml_path`` is set.
|
|
778
|
+
|
|
779
|
+
Args:
|
|
780
|
+
overwrite: If False, preserve existing values (setdefault semantics).
|
|
781
|
+
"""
|
|
782
|
+
parts = rel_path.rsplit("/", 1)
|
|
783
|
+
if overwrite:
|
|
784
|
+
if len(parts) == 2:
|
|
785
|
+
annotations["git_relative_dir"] = parts[0]
|
|
786
|
+
annotations["component_yaml_path"] = parts[1]
|
|
787
|
+
else:
|
|
788
|
+
annotations["component_yaml_path"] = rel_path
|
|
789
|
+
else:
|
|
790
|
+
if len(parts) == 2:
|
|
791
|
+
annotations.setdefault("git_relative_dir", parts[0])
|
|
792
|
+
annotations.setdefault("component_yaml_path", parts[1])
|
|
793
|
+
else:
|
|
794
|
+
annotations.setdefault("component_yaml_path", rel_path)
|
|
795
|
+
|
|
796
|
+
|
|
797
|
+
def normalize_annotation_paths(
|
|
798
|
+
yaml_path: "str | Path",
|
|
799
|
+
git_root: "str | Path",
|
|
800
|
+
annotations: dict[str, str],
|
|
801
|
+
) -> None:
|
|
802
|
+
"""Normalize ``dockerfile_path`` and ``documentation_path`` to be relative to ``git_relative_dir``.
|
|
803
|
+
|
|
804
|
+
Component authors may write path annotations relative to the YAML file's
|
|
805
|
+
directory (e.g. ``../../../../dockerfiles/foo.Dockerfile``) or relative to
|
|
806
|
+
``git_relative_dir`` (e.g. ``dockerfiles/foo.Dockerfile``). This function
|
|
807
|
+
resolves each path using filesystem checks and re-expresses it relative to
|
|
808
|
+
the final ``git_relative_dir``.
|
|
809
|
+
|
|
810
|
+
Resolution order for each path annotation:
|
|
811
|
+
|
|
812
|
+
1. Relative to ``git_relative_dir`` — if the file exists, leave the value
|
|
813
|
+
as-is (already correct).
|
|
814
|
+
2. Relative to the YAML file's parent directory — if the file exists,
|
|
815
|
+
re-express it relative to ``git_relative_dir``.
|
|
816
|
+
3. If neither resolves to an existing file, leave the value unchanged.
|
|
817
|
+
|
|
818
|
+
This is a no-op when ``git_relative_dir`` equals the YAML file's parent
|
|
819
|
+
directory (the common case).
|
|
820
|
+
|
|
821
|
+
Args:
|
|
822
|
+
yaml_path: Filesystem path to the component YAML file.
|
|
823
|
+
git_root: Filesystem path to the git repository root.
|
|
824
|
+
annotations: The ``metadata.annotations`` dict (modified in place).
|
|
825
|
+
"""
|
|
826
|
+
import os
|
|
827
|
+
from pathlib import Path as _Path
|
|
828
|
+
|
|
829
|
+
git_relative_dir = annotations.get("git_relative_dir")
|
|
830
|
+
if not git_relative_dir:
|
|
831
|
+
return
|
|
832
|
+
|
|
833
|
+
git_root = _Path(git_root)
|
|
834
|
+
yaml_parent = _Path(yaml_path).resolve().parent
|
|
835
|
+
git_rel_dir_abs = (git_root / git_relative_dir).resolve()
|
|
836
|
+
|
|
837
|
+
# If git_relative_dir resolves to the YAML parent, paths are equivalent — skip
|
|
838
|
+
if git_rel_dir_abs == yaml_parent:
|
|
839
|
+
return
|
|
840
|
+
|
|
841
|
+
for key in ("dockerfile_path", "documentation_path"):
|
|
842
|
+
value = annotations.get(key)
|
|
843
|
+
if not value:
|
|
844
|
+
continue
|
|
845
|
+
|
|
846
|
+
# 1. Already relative to git_relative_dir?
|
|
847
|
+
candidate_git = git_rel_dir_abs / value
|
|
848
|
+
if candidate_git.resolve().exists():
|
|
849
|
+
continue # already correct
|
|
850
|
+
|
|
851
|
+
# 2. Relative to YAML parent dir?
|
|
852
|
+
candidate_yaml = yaml_parent / value
|
|
853
|
+
if candidate_yaml.resolve().exists():
|
|
854
|
+
# Re-express relative to git_relative_dir. Use os.path.relpath
|
|
855
|
+
# rather than Path.relative_to so that files *above*
|
|
856
|
+
# git_relative_dir produce ``../`` prefixed paths.
|
|
857
|
+
normalized = os.path.relpath(
|
|
858
|
+
str(candidate_yaml.resolve()), str(git_rel_dir_abs)
|
|
859
|
+
)
|
|
860
|
+
annotations[key] = normalized
|
|
861
|
+
|
|
862
|
+
|
|
863
|
+
# CI environment variables probed for git metadata (checked in order, first
|
|
864
|
+
# match wins). Covers Buildkite, GitHub Actions, and GitLab CI out of the
|
|
865
|
+
# box. Wrapper packages can prepend additional CI-system-specific variables
|
|
866
|
+
# by monkey-patching these module attributes at import time.
|
|
867
|
+
_CI_GIT_ROOT_VARS: tuple[str, ...] = ("BUILDKITE_BUILD_CHECKOUT_PATH", "GITHUB_WORKSPACE", "CI_PROJECT_DIR")
|
|
868
|
+
_CI_SHA_VARS: tuple[str, ...] = ("BUILDKITE_COMMIT", "GITHUB_SHA", "CI_COMMIT_SHA")
|
|
869
|
+
_CI_BRANCH_VARS: tuple[str, ...] = ("BUILDKITE_BRANCH", "GITHUB_REF_NAME", "CI_COMMIT_BRANCH")
|
|
870
|
+
_CI_REPO_URL_VARS: tuple[str, ...] = ("BUILDKITE_REPO", "GITHUB_SERVER_URL", "CI_REPOSITORY_URL")
|
|
871
|
+
|
|
872
|
+
|
|
873
|
+
def _normalize_git_url(url: str) -> str:
|
|
874
|
+
"""Normalize a git remote URL to a browsable HTTPS URL.
|
|
875
|
+
|
|
876
|
+
Handles common formats:
|
|
877
|
+
- ``git@github.com:Org/repo.git`` -> ``https://github.com/Org/repo``
|
|
878
|
+
- ``ssh://git@github.com/Org/repo.git`` -> ``https://github.com/Org/repo``
|
|
879
|
+
- ``https://github.com/Org/repo.git`` -> ``https://github.com/Org/repo``
|
|
880
|
+
- ``https://github.com/Org/repo`` -> unchanged
|
|
881
|
+
|
|
882
|
+
The ``.git`` suffix is stripped so the result can be used directly to
|
|
883
|
+
build ``/blob/{ref}/{path}`` links without an extra ``.removesuffix``.
|
|
884
|
+
"""
|
|
885
|
+
import re
|
|
886
|
+
|
|
887
|
+
# SCP-style: git@host:path
|
|
888
|
+
m = re.match(r"^git@([^:]+):(.+)$", url)
|
|
889
|
+
if m:
|
|
890
|
+
url = f"https://{m.group(1)}/{m.group(2)}"
|
|
891
|
+
else:
|
|
892
|
+
# ssh://git@host/path
|
|
893
|
+
m = re.match(r"^ssh://(?:[^@]+@)?([^/]+)/(.+)$", url)
|
|
894
|
+
if m:
|
|
895
|
+
url = f"https://{m.group(1)}/{m.group(2)}"
|
|
896
|
+
|
|
897
|
+
return url.removesuffix(".git")
|
|
898
|
+
|
|
899
|
+
|
|
900
|
+
def _fill_from_ci_env(info: dict[str, str]) -> None:
|
|
901
|
+
"""Fill missing git info fields from common CI environment variables.
|
|
902
|
+
|
|
903
|
+
The env var lists are defined as module-level constants
|
|
904
|
+
(``_CI_GIT_ROOT_VARS``, ``_CI_SHA_VARS``, ``_CI_BRANCH_VARS``,
|
|
905
|
+
``_CI_REPO_URL_VARS``) so they can be extended to support new CI systems.
|
|
906
|
+
"""
|
|
907
|
+
import os
|
|
908
|
+
|
|
909
|
+
if "_git_root" not in info:
|
|
910
|
+
for var in _CI_GIT_ROOT_VARS:
|
|
911
|
+
val = os.environ.get(var)
|
|
912
|
+
if val:
|
|
913
|
+
info["_git_root"] = val
|
|
914
|
+
break
|
|
915
|
+
|
|
916
|
+
if "git_remote_sha" not in info:
|
|
917
|
+
for var in _CI_SHA_VARS:
|
|
918
|
+
val = os.environ.get(var)
|
|
919
|
+
if val:
|
|
920
|
+
info["git_remote_sha"] = val
|
|
921
|
+
break
|
|
922
|
+
|
|
923
|
+
if "git_remote_branch" not in info:
|
|
924
|
+
for var in _CI_BRANCH_VARS:
|
|
925
|
+
val = os.environ.get(var)
|
|
926
|
+
if val:
|
|
927
|
+
info["git_remote_branch"] = val
|
|
928
|
+
break
|
|
929
|
+
|
|
930
|
+
if "git_remote_url" not in info:
|
|
931
|
+
for var in _CI_REPO_URL_VARS:
|
|
932
|
+
val = os.environ.get(var)
|
|
933
|
+
if val:
|
|
934
|
+
# GITHUB_SERVER_URL needs GITHUB_REPOSITORY appended
|
|
935
|
+
if var == "GITHUB_SERVER_URL":
|
|
936
|
+
repo = os.environ.get("GITHUB_REPOSITORY", "")
|
|
937
|
+
if repo:
|
|
938
|
+
val = f"{val}/{repo}"
|
|
939
|
+
else:
|
|
940
|
+
continue
|
|
941
|
+
info["git_remote_url"] = val
|
|
942
|
+
break
|