tangle-cli 0.0.1a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. tangle_cli/__init__.py +19 -0
  2. tangle_cli/api_cli.py +787 -0
  3. tangle_cli/api_schema.py +633 -0
  4. tangle_cli/api_transport.py +461 -0
  5. tangle_cli/args_container.py +244 -0
  6. tangle_cli/artifacts.py +293 -0
  7. tangle_cli/artifacts_cli.py +108 -0
  8. tangle_cli/cli.py +57 -0
  9. tangle_cli/cli_helpers.py +116 -0
  10. tangle_cli/cli_options.py +52 -0
  11. tangle_cli/client.py +677 -0
  12. tangle_cli/component_from_func.py +1856 -0
  13. tangle_cli/component_generator.py +298 -0
  14. tangle_cli/component_inspector.py +494 -0
  15. tangle_cli/component_publisher.py +921 -0
  16. tangle_cli/components_cli.py +269 -0
  17. tangle_cli/dynamic_discovery_client.py +296 -0
  18. tangle_cli/generated_model_extensions.py +405 -0
  19. tangle_cli/generated_runtime.py +43 -0
  20. tangle_cli/handler.py +96 -0
  21. tangle_cli/hydration_trust.py +222 -0
  22. tangle_cli/logger.py +166 -0
  23. tangle_cli/models.py +407 -0
  24. tangle_cli/module_bundler.py +662 -0
  25. tangle_cli/openapi/__init__.py +0 -0
  26. tangle_cli/openapi/codegen.py +1090 -0
  27. tangle_cli/openapi/parser.py +77 -0
  28. tangle_cli/pipeline_dehydrator.py +720 -0
  29. tangle_cli/pipeline_hydrator.py +1785 -0
  30. tangle_cli/pipeline_run_annotations.py +41 -0
  31. tangle_cli/pipeline_run_details.py +203 -0
  32. tangle_cli/pipeline_run_manager.py +1994 -0
  33. tangle_cli/pipeline_run_search.py +712 -0
  34. tangle_cli/pipeline_runner.py +620 -0
  35. tangle_cli/pipeline_runs_cli.py +584 -0
  36. tangle_cli/pipelines.py +581 -0
  37. tangle_cli/pipelines_cli.py +271 -0
  38. tangle_cli/published_components_cli.py +373 -0
  39. tangle_cli/py.typed +0 -0
  40. tangle_cli/quickstart.py +110 -0
  41. tangle_cli/secrets.py +156 -0
  42. tangle_cli/secrets_cli.py +269 -0
  43. tangle_cli/utils.py +942 -0
  44. tangle_cli/version_manager.py +470 -0
  45. tangle_cli-0.0.1a1.dist-info/METADATA +561 -0
  46. tangle_cli-0.0.1a1.dist-info/RECORD +48 -0
  47. tangle_cli-0.0.1a1.dist-info/WHEEL +4 -0
  48. tangle_cli-0.0.1a1.dist-info/entry_points.txt +3 -0
tangle_cli/utils.py ADDED
@@ -0,0 +1,942 @@
1
+ """
2
+ Generic utility functions for tangle-cli.
3
+
4
+ YAML parsing/dumping, version comparison, digest computation, git metadata
5
+ extraction, and pipeline-spec traversal.
6
+ """
7
+
8
+ import hashlib
9
+ import os
10
+ import re
11
+ import subprocess
12
+ from collections import OrderedDict
13
+ from collections.abc import Callable, Mapping
14
+ from pathlib import Path
15
+ from typing import Any
16
+
17
+ import yaml
18
+
19
+ from tangle_cli.logger import Logger, get_default_logger
20
+
21
+ # =============================================================================
22
+ # Generic Data Helpers
23
+ # =============================================================================
24
+
25
+
26
+ def _strip_text_from_graph(implementation: dict[str, Any]) -> None:
27
+ """Recursively remove raw component text from graph component references."""
28
+
29
+ graph = implementation.get("graph", {})
30
+ for task_data in graph.get("tasks", {}).values():
31
+ ref = task_data.get("componentRef")
32
+ if not ref:
33
+ continue
34
+ ref.pop("text", None)
35
+ spec = ref.get("spec", {})
36
+ nested_impl = spec.get("implementation")
37
+ if nested_impl and "graph" in nested_impl:
38
+ _strip_text_from_graph(nested_impl)
39
+
40
+
41
+ def add_official_prefix(name: str | None) -> str | None:
42
+ """Return the official component name variant used by registry searches."""
43
+
44
+ if name and not name.startswith("[Official]"):
45
+ return f"[Official] {name}"
46
+ return name
47
+
48
+
49
+ def _value_from_mapping_or_object(value: object, key: str, default: Any = None) -> Any:
50
+ """Read a field from a mapping, generated model, or attribute object."""
51
+
52
+ if isinstance(value, Mapping):
53
+ return value.get(key, default)
54
+
55
+ get = getattr(value, "get", None)
56
+ if callable(get):
57
+ return get(key, default)
58
+
59
+ to_dict = getattr(value, "to_dict", None)
60
+ if callable(to_dict):
61
+ data = to_dict()
62
+ if isinstance(data, Mapping):
63
+ return data.get(key, default)
64
+
65
+ return getattr(value, key, default)
66
+
67
+
68
+ def _optional_str(value: Any) -> str | None:
69
+ """Return *value* only when it is already a string."""
70
+
71
+ return value if isinstance(value, str) else None
72
+
73
+
74
+ # =============================================================================
75
+ # Numeric Helpers
76
+ # =============================================================================
77
+
78
+
79
+ def clamp(value: float, lower: float, upper: float) -> float:
80
+ """Return value bounded to the inclusive ``[lower, upper]`` range."""
81
+ return min(max(value, lower), upper)
82
+
83
+
84
+ # =============================================================================
85
+ # Environment Helpers
86
+ # =============================================================================
87
+
88
+ # Values accepted as truthy for boolean-style env vars across Tangle tooling.
89
+ _TRUTHY_ENV_VALUES = ("1", "true", "yes")
90
+
91
+
92
+ def tangle_verbose_enabled() -> bool:
93
+ """Return True if the ``TANGLE_VERBOSE`` env var is set to a truthy value.
94
+
95
+ Truthy values (case-insensitive): ``"1"``, ``"true"``, ``"yes"``. This is
96
+ the canonical check used by the API client, publisher, and hydrator so
97
+ that verbose-only diagnostics behave consistently across the codebase.
98
+ """
99
+ return os.environ.get("TANGLE_VERBOSE", "").lower() in _TRUTHY_ENV_VALUES
100
+
101
+
102
+ # =============================================================================
103
+ # Component-Path Conventions
104
+ # =============================================================================
105
+
106
+
107
+ def find_documentation_path_for_yaml(yaml_path: Path) -> str | None:
108
+ """Return ``docs/<stem>.md`` next to a component YAML, if it exists.
109
+
110
+ Encodes the convention that a component YAML at ``foo/bar.yaml`` carries
111
+ its human-readable docs at ``foo/docs/bar.md``. Returns the absolute
112
+ path as a string, or ``None`` when no such file exists.
113
+ """
114
+ docs_path = yaml_path.parent / "docs" / f"{yaml_path.stem}.md"
115
+ return str(docs_path.resolve()) if docs_path.exists() else None
116
+
117
+
118
+ # =============================================================================
119
+ # String / Template Helpers
120
+ # =============================================================================
121
+
122
+ # Recognizes ``${name}`` or ``${name:-default}`` placeholders. The syntax
123
+ # is borrowed from POSIX parameter expansion for familiarity, but these
124
+ # placeholders have nothing to do with shells, processes, or environments
125
+ # — they're filled from an explicit ``vars`` dict, never from
126
+ # ``os.environ``. ``name`` follows Python identifier rules (letter or
127
+ # underscore start, then alphanumerics / underscores). ``default`` is
128
+ # everything up to the closing ``}`` and may be empty (``${name:-}``).
129
+ #
130
+ # Convention: prefer lowercase / snake_case ``name``s. Uppercase reads as
131
+ # an env-var reference and risks misleading readers about what's actually
132
+ # providing the values.
133
+ _VAR_RE = re.compile(r"\$\{([A-Za-z_][A-Za-z0-9_]*)(?::-([^}]*))?\}")
134
+
135
+
136
+ class UnsetVarError(KeyError):
137
+ """Raised when a strict ``${name}`` placeholder has no value and no default.
138
+
139
+ A ``KeyError`` subclass so existing ``except KeyError`` handlers keep
140
+ working; the dedicated type lets callers distinguish unresolved
141
+ placeholders from incidental ``KeyError``s if they want a clearer
142
+ error message.
143
+ """
144
+
145
+
146
+ def expand_vars(text: str, vars: dict[str, str]) -> str:
147
+ """Expand ``${name}`` / ``${name:-default}`` placeholders in ``text``.
148
+
149
+ Mirrors ``os.path.expandvars`` in syntax, but reads from an explicit
150
+ ``vars`` dict instead of ``os.environ`` — these are *not* environment
151
+ variables, despite the syntax similarity. Lowercase / snake_case
152
+ names are conventional here (uppercase would mislead readers who treat
153
+ the same syntax as env-var interpolation in shells/Docker/etc.).
154
+ Recognized forms:
155
+
156
+ * ``${name}`` — strict; raises :class:`UnsetVarError` (a ``KeyError``
157
+ subclass) if ``name`` is missing from ``vars``.
158
+ * ``${name:-default}`` — falls back to the literal ``default`` text when
159
+ ``name`` is missing. ``${name:-}`` substitutes the empty string.
160
+
161
+ Substitution is purely textual; values are inserted verbatim. Callers
162
+ that interpolate into structured formats (YAML, JSON, shell commands,
163
+ …) should quote the placeholder appropriately so unusual values can't
164
+ break the surrounding syntax — e.g. for YAML, write
165
+ ``image: "${image:-}"`` so a value beginning with ``*`` doesn't get
166
+ parsed as an alias reference.
167
+
168
+ Args:
169
+ text: The text containing zero or more placeholders.
170
+ vars: Flat ``{name: stringified_value}`` map. Empty/None falls back
171
+ to a no-op when no placeholders are present in ``text``.
172
+
173
+ Returns:
174
+ ``text`` with every recognized placeholder replaced.
175
+
176
+ Raises:
177
+ UnsetVarError: A strict ``${name}`` placeholder had no
178
+ corresponding entry in ``vars``.
179
+ """
180
+ if not vars and "${" not in text:
181
+ return text
182
+
183
+ def _replace(m: re.Match[str]) -> str:
184
+ name = m.group(1)
185
+ default = m.group(2)
186
+ if name in vars:
187
+ return vars[name]
188
+ if default is not None:
189
+ return default
190
+ raise UnsetVarError(name)
191
+
192
+ return _VAR_RE.sub(_replace, text)
193
+
194
+
195
+ def resolve_input_path(path: Path, config_dir: Path | None) -> Path:
196
+ """Resolve a relative input path by trying cwd first, then the config directory.
197
+
198
+ Used to make config file entries portable: a relative input path like
199
+ ``pipelines/foo.yaml`` is tried against the cwd first (preserving existing
200
+ behavior), then against the config file's directory as a fallback.
201
+
202
+ Args:
203
+ path: Input path to resolve.
204
+ config_dir: Directory of the config file. If ``None``, path is returned unchanged.
205
+
206
+ Returns:
207
+ The resolved absolute path, or the original path if nothing matched.
208
+ """
209
+ if config_dir is None or path.is_absolute() or path.exists():
210
+ return path
211
+ candidate = config_dir / path
212
+ return candidate.resolve() if candidate.exists() else path
213
+
214
+
215
+ # =============================================================================
216
+ # Dict merge helpers
217
+ # =============================================================================
218
+
219
+
220
+ def apply_defaults(
221
+ entries: dict[str, Any] | list[dict[str, Any]],
222
+ defaults: dict[str, Any],
223
+ ) -> dict[str, Any] | list[dict[str, Any]]:
224
+ """Shallow-merge *defaults* into *entries* (entry values take precedence).
225
+
226
+ Works on a single dict, a list of dicts, or a dict-of-dicts (keyed entries).
227
+ For a dict-of-dicts, keys starting with ``_`` are excluded from merging
228
+ (they are metadata like ``_defaults`` itself).
229
+
230
+ Args:
231
+ entries: The entries to merge defaults into.
232
+ defaults: Default values (overridden by entry values).
233
+
234
+ Returns:
235
+ Merged result in the same shape as *entries*.
236
+ """
237
+ if isinstance(entries, list):
238
+ return [{**defaults, **item} if isinstance(item, dict) else item for item in entries]
239
+ return {**defaults, **entries}
240
+
241
+
242
+ # =============================================================================
243
+ # Digest Utilities
244
+ # =============================================================================
245
+
246
+
247
+ def compute_text_digest(text: str) -> str:
248
+ """Compute a SHA256 digest from raw text.
249
+
250
+ Args:
251
+ text: The text to hash.
252
+
253
+ Returns:
254
+ Hex digest string.
255
+ """
256
+ return hashlib.sha256(text.encode("utf-8")).hexdigest()
257
+
258
+
259
+ def compute_spec_digest(spec: dict[str, Any]) -> str:
260
+ """Compute a SHA256 digest for a component spec.
261
+
262
+ Args:
263
+ spec: The component spec dict.
264
+
265
+ Returns:
266
+ Hex digest string.
267
+ """
268
+ # Serialize spec to YAML with sorted keys for deterministic output
269
+ yaml_str = dump_yaml(spec, sort_keys=True)
270
+ return compute_text_digest(yaml_str)
271
+
272
+
273
+ # Type alias for task processor callback
274
+ # Receives (task_name, task_data, path, base_dir) and returns processed task_data.
275
+ TaskProcessor = Callable[[str, dict[str, Any], str, Path | None, dict[str, Any] | None], dict[str, Any]]
276
+
277
+
278
+ def is_subgraph_spec(spec: dict[str, Any] | None) -> bool:
279
+ """Check if a spec contains a subgraph (has implementation.graph)."""
280
+ if not spec:
281
+ return False
282
+ return "graph" in spec.get("implementation", {})
283
+
284
+
285
+ def is_graph_task(task_data: dict[str, Any]) -> bool:
286
+ """Check if a task has a componentRef that is a subgraph.
287
+
288
+ Args:
289
+ task_data: The task dict to check.
290
+
291
+ Returns:
292
+ True if the task has a componentRef with nested implementation.graph.
293
+ """
294
+ component_ref = task_data.get("componentRef")
295
+ if not isinstance(component_ref, dict):
296
+ return False
297
+ return is_subgraph_spec(component_ref.get("spec", {}))
298
+
299
+
300
+ def get_component_ref_info(component_ref: dict[str, Any]) -> tuple[str, str]:
301
+ """Extract name and digest from a componentRef.
302
+
303
+ Args:
304
+ component_ref: The componentRef dict (must have spec.name and digest).
305
+
306
+ Returns:
307
+ Tuple of (name, digest).
308
+ """
309
+ name = component_ref.get("spec", {}).get("name", "unknown")
310
+ digest = component_ref.get("digest", "unknown")
311
+ return name, digest
312
+
313
+
314
+ def _strip_internal_annotations(spec: dict[str, Any]) -> None:
315
+ """Remove all internal underscore-prefixed keys from a spec dict.
316
+
317
+ These keys (e.g. ``_source_dir``, ``_recursive_params``) are used during
318
+ traversal and must not leak into the final output.
319
+ """
320
+ for key in [k for k in spec if k.startswith("_")]:
321
+ del spec[key]
322
+
323
+
324
+ def _extract_source_dir(spec: dict[str, Any], fallback: Path | None) -> Path | None:
325
+ """Extract and remove _source_dir annotation from a spec.
326
+
327
+ When a component is loaded from a local file, _source_dir is set to the
328
+ directory containing that file. This allows nested file:// references to
329
+ be resolved relative to the file they appear in, not the top-level pipeline.
330
+ """
331
+ source_dir = spec.pop("_source_dir", None)
332
+ if source_dir is not None:
333
+ return Path(source_dir)
334
+ return fallback
335
+
336
+
337
+ def _extract_recursive_params(
338
+ spec: dict[str, Any], fallback: dict[str, Any] | None,
339
+ ) -> dict[str, Any] | None:
340
+ """Extract and remove _recursive_params annotation from a spec.
341
+
342
+ When recursive context is active, _recursive_params carries the accumulated
343
+ template parameters for this subtree. Works like _source_dir: the value is
344
+ consumed here and threaded through the recursive traversal.
345
+ """
346
+ return spec.pop("_recursive_params", fallback)
347
+
348
+
349
+ def traverse_pipeline_tasks(
350
+ spec: dict[str, Any],
351
+ parent_name: str,
352
+ task_processor: TaskProcessor,
353
+ base_dir: Path | None = None,
354
+ recursive_params: dict[str, Any] | None = None,
355
+ ) -> dict[str, Any]:
356
+ """Traverse a pipeline/component spec and process each task recursively.
357
+
358
+ This function walks through implementation.graph.tasks. For each task:
359
+ - If it's a subgraph (has componentRef with nested graph), recurse into it without processing
360
+ - Otherwise, call task_processor to handle the task
361
+
362
+ When a nested spec has a '_source_dir' annotation (set when a component was
363
+ loaded from a local file), the base_dir is updated for that subtree so that
364
+ nested file:// references resolve relative to the loaded file.
365
+
366
+ Similarly, '_recursive_params' carries accumulated template parameters for
367
+ recursive context propagation. Like _source_dir, the value is extracted from
368
+ specs at recursion boundaries and threaded through to the task processor.
369
+
370
+ Args:
371
+ spec: The component/pipeline spec with implementation.graph.tasks structure.
372
+ parent_name: Name prefix for path display (e.g., pipeline name).
373
+ task_processor: Callback to process non-subgraph tasks.
374
+ Receives (task_name, task_data, path, base_dir, recursive_params)
375
+ and returns the processed task dict.
376
+ base_dir: Base directory for resolving relative file paths. Updated
377
+ automatically when entering specs loaded from local files
378
+ (via _source_dir annotation).
379
+ recursive_params: Accumulated template parameters for recursive context.
380
+ Updated automatically when entering specs with
381
+ _recursive_params annotation.
382
+
383
+ Returns:
384
+ The spec with all tasks processed (including nested subgraph tasks).
385
+ """
386
+ implementation = spec.get("implementation", {})
387
+ graph = implementation.get("graph", {})
388
+ tasks = graph.get("tasks", {})
389
+
390
+ if not tasks:
391
+ return spec
392
+
393
+ processed_tasks = {}
394
+ for task_name, task_data in tasks.items():
395
+ path = f"{parent_name}.{task_name}" if parent_name else task_name
396
+
397
+ # If task is a subgraph, recurse into it without processing
398
+ if is_graph_task(task_data):
399
+ component_ref = task_data["componentRef"]
400
+ nested_spec = component_ref.get("spec", {})
401
+ nested_name = component_ref.get("name", task_name)
402
+ nested_base_dir = _extract_source_dir(nested_spec, base_dir)
403
+ nested_params = _extract_recursive_params(nested_spec, recursive_params)
404
+
405
+ resolved_nested_spec = traverse_pipeline_tasks(
406
+ nested_spec, nested_name, task_processor, nested_base_dir, nested_params
407
+ )
408
+ _strip_internal_annotations(resolved_nested_spec)
409
+
410
+ if resolved_nested_spec != nested_spec:
411
+ processed_task = dict(task_data)
412
+ # Use spec name as fallback, compute digest if not present
413
+ new_ref = {
414
+ "name": component_ref.get("name") or nested_spec.get("name", ""),
415
+ "digest": component_ref.get("digest") or compute_spec_digest(resolved_nested_spec),
416
+ "spec": resolved_nested_spec,
417
+ }
418
+ processed_task["componentRef"] = new_ref
419
+ else:
420
+ processed_task = task_data
421
+ else:
422
+ # Process non-subgraph tasks, passing current base_dir and recursive params
423
+ processed_task = task_processor(task_name, task_data, path, base_dir, recursive_params)
424
+
425
+ # If processing created a subgraph, recurse into it
426
+ if is_graph_task(processed_task):
427
+ component_ref = processed_task["componentRef"]
428
+ nested_spec = component_ref.get("spec", {})
429
+ nested_name = component_ref.get("name", task_name)
430
+ nested_base_dir = _extract_source_dir(nested_spec, base_dir)
431
+ nested_params = _extract_recursive_params(nested_spec, recursive_params)
432
+
433
+ resolved_nested_spec = traverse_pipeline_tasks(
434
+ nested_spec, nested_name, task_processor, nested_base_dir, nested_params
435
+ )
436
+ _strip_internal_annotations(resolved_nested_spec)
437
+
438
+ if resolved_nested_spec != nested_spec:
439
+ processed_task = dict(processed_task)
440
+ # Use spec name as fallback, compute digest if not present
441
+ new_ref = {
442
+ "name": component_ref.get("name") or nested_spec.get("name", ""),
443
+ "digest": component_ref.get("digest") or compute_spec_digest(resolved_nested_spec),
444
+ "spec": resolved_nested_spec,
445
+ }
446
+ processed_task["componentRef"] = new_ref
447
+ else:
448
+ # Strip internal annotations from non-subgraph specs (no nested tasks to resolve)
449
+ cr = processed_task.get("componentRef")
450
+ if isinstance(cr, dict):
451
+ s = cr.get("spec")
452
+ if isinstance(s, dict):
453
+ _strip_internal_annotations(s)
454
+
455
+ processed_tasks[task_name] = processed_task
456
+
457
+ # Rebuild the spec with processed tasks
458
+ result = dict(spec)
459
+ result["implementation"] = dict(implementation)
460
+ result["implementation"]["graph"] = dict(graph)
461
+ result["implementation"]["graph"]["tasks"] = processed_tasks
462
+ return result
463
+
464
+
465
+ def parse_yaml_string(yaml_content, logger: Logger | None = None):
466
+ """
467
+ Parse a YAML string into a data structure.
468
+
469
+ Args:
470
+ yaml_content: YAML string content
471
+
472
+ Returns:
473
+ Parsed data structure or None if parsing fails
474
+ """
475
+ log = logger or get_default_logger()
476
+
477
+ # Setup YAML to properly handle OrderedDict and compact lists
478
+ def represent_ordereddict(dumper, data):
479
+ return dumper.represent_dict(data.items())
480
+
481
+ yaml.add_representer(OrderedDict, represent_ordereddict)
482
+
483
+ try:
484
+ return yaml.safe_load(yaml_content)
485
+ except Exception as e:
486
+ import traceback
487
+ log.error(f"YAML parsing error: {e}")
488
+ log.error(f"Traceback: {traceback.format_exc()}")
489
+ return None
490
+
491
+
492
+ class _LiteralBlockDumper(yaml.SafeDumper):
493
+ """YAML dumper that uses literal block style (|) for multiline strings."""
494
+ pass
495
+
496
+
497
+ def _literal_str_representer(dumper: yaml.SafeDumper, data: str) -> yaml.ScalarNode:
498
+ if '\n' in data:
499
+ return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='|')
500
+ return dumper.represent_scalar('tag:yaml.org,2002:str', data)
501
+
502
+
503
+ _LiteralBlockDumper.add_representer(str, _literal_str_representer)
504
+
505
+
506
+ def dump_yaml(data: dict[str, Any], sort_keys: bool = False, width: int | None = None) -> str:
507
+ """
508
+ Dump a data structure to a YAML string with consistent formatting.
509
+
510
+ Multiline strings are rendered using literal block style (|).
511
+
512
+ Args:
513
+ data: Dictionary to serialize to YAML
514
+ sort_keys: Whether to sort dictionary keys (default: False)
515
+ width: Line width limit (default: None, no limit)
516
+
517
+ Returns:
518
+ YAML string
519
+ """
520
+ return yaml.dump(
521
+ data, Dumper=_LiteralBlockDumper,
522
+ default_flow_style=False, sort_keys=sort_keys, allow_unicode=True, width=width,
523
+ )
524
+
525
+
526
+ def get_version_from_data(data):
527
+ """
528
+ Extract version from a data dictionary (parsed YAML structure).
529
+
530
+ Checks metadata.annotations.version first (preferred), then falls back
531
+ to top-level version for backward compatibility.
532
+
533
+ Args:
534
+ data: Dictionary containing the parsed YAML structure
535
+
536
+ Returns:
537
+ Version string or None if not found
538
+ """
539
+ if not data:
540
+ return None
541
+
542
+ # Check metadata.annotations.version first (preferred location)
543
+ metadata = data.get('metadata')
544
+ if metadata:
545
+ annotations = metadata.get('annotations')
546
+ if annotations and 'version' in annotations:
547
+ return str(annotations['version'])
548
+
549
+ # Fall back to top-level version for backward compatibility
550
+ if 'version' in data:
551
+ return str(data['version'])
552
+
553
+ return None
554
+
555
+
556
+ def get_version_component(parts, index, default=0):
557
+ """
558
+ Get version component at index as int, or default if not parseable.
559
+
560
+ Args:
561
+ parts: List of version components
562
+ index: Index to retrieve
563
+ default: Default value if component is missing or not numeric
564
+
565
+ Returns:
566
+ Integer version component or default
567
+ """
568
+ try:
569
+ return int(parts[index]) if index < len(parts) else default
570
+ except (ValueError, TypeError, IndexError):
571
+ return default
572
+
573
+
574
+ def compare_versions(a: str, b: str) -> int:
575
+ """Compare two version strings component-wise, returning -1, 0, or 1.
576
+
577
+ Unlike :func:`check_versions`, this pads the shorter version with
578
+ zeros so that ``1.0.1`` is correctly greater than ``1.0``.
579
+
580
+ Args:
581
+ a: First version string (e.g. "1.2.3").
582
+ b: Second version string (e.g. "1.2").
583
+
584
+ Returns:
585
+ -1 if a < b, 0 if a == b, 1 if a > b.
586
+ """
587
+ a_parts = a.split(".")
588
+ b_parts = b.split(".")
589
+ length = max(len(a_parts), len(b_parts))
590
+ for i in range(length):
591
+ a_val = get_version_component(a_parts, i)
592
+ b_val = get_version_component(b_parts, i)
593
+ if a_val > b_val:
594
+ return 1
595
+ if a_val < b_val:
596
+ return -1
597
+ return 0
598
+
599
+
600
+ def check_versions(local_version, latest_version, check_precedence=False):
601
+ """Check if a version update should proceed.
602
+
603
+ Thin wrapper around :func:`compare_versions` for backward compatibility.
604
+
605
+ Args:
606
+ local_version: The local version string.
607
+ latest_version: The latest published version (or None if not found).
608
+ check_precedence: If True, return True only when *local* is strictly
609
+ newer. If False (default), return True when versions differ.
610
+
611
+ Returns:
612
+ bool: True if should proceed with update, False if should skip.
613
+ """
614
+ if not latest_version:
615
+ return True
616
+
617
+ cmp = compare_versions(local_version, latest_version)
618
+
619
+ if check_precedence:
620
+ return cmp > 0
621
+ return cmp != 0
622
+
623
+
624
+ # =============================================================================
625
+ # Git info collection
626
+ # =============================================================================
627
+
628
+
629
+ def get_git_root(directory: Path) -> Path | None:
630
+ """Find the git repository root for a directory."""
631
+ try:
632
+ result = subprocess.run(
633
+ ["git", "rev-parse", "--show-toplevel"],
634
+ cwd=str(directory), capture_output=True, text=True, timeout=5,
635
+ )
636
+ if result.returncode == 0:
637
+ return Path(result.stdout.strip())
638
+ except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
639
+ pass
640
+ return None
641
+
642
+
643
+ def get_git_info(directory: Path, logger: Logger | None = None) -> dict[str, str]:
644
+ """Collect git metadata for annotations.
645
+
646
+ Uses subprocess git commands to avoid requiring gitpython.
647
+ The returned dict includes a ``_git_root`` key (absolute path to the
648
+ repository root) so callers can compute relative paths without a
649
+ second subprocess call. This key is prefixed with ``_`` to signal
650
+ it is not a component annotation and should not be persisted.
651
+ """
652
+ info: dict[str, str] = {}
653
+
654
+ try:
655
+ # Find git root
656
+ result = subprocess.run(
657
+ ["git", "rev-parse", "--show-toplevel"],
658
+ cwd=str(directory), capture_output=True, text=True, timeout=5,
659
+ )
660
+ if result.returncode != 0:
661
+ if logger:
662
+ stderr = result.stderr.strip() if result.stderr else "unknown reason"
663
+ logger.warn(f"⚠️ Not a git repository ({stderr}). "
664
+ "Will try CI environment variables.")
665
+ else:
666
+ git_root = Path(result.stdout.strip())
667
+ info["_git_root"] = str(git_root)
668
+
669
+ # git_relative_dir
670
+ try:
671
+ rel_dir = directory.resolve().relative_to(git_root)
672
+ info["git_relative_dir"] = rel_dir.as_posix()
673
+ except ValueError:
674
+ pass
675
+
676
+ # git_local_branch
677
+ result = subprocess.run(
678
+ ["git", "rev-parse", "--abbrev-ref", "HEAD"],
679
+ cwd=str(directory), capture_output=True, text=True, timeout=5,
680
+ )
681
+ if result.returncode == 0:
682
+ info["git_local_branch"] = result.stdout.strip()
683
+
684
+ # git_local_sha
685
+ result = subprocess.run(
686
+ ["git", "rev-parse", "HEAD"],
687
+ cwd=str(directory), capture_output=True, text=True, timeout=5,
688
+ )
689
+ if result.returncode == 0:
690
+ info["git_local_sha"] = result.stdout.strip()
691
+
692
+ # Tracking branch info
693
+ result = subprocess.run(
694
+ ["git", "rev-parse", "--abbrev-ref", "--symbolic-full-name", "@{u}"],
695
+ cwd=str(directory), capture_output=True, text=True, timeout=5,
696
+ )
697
+ if result.returncode == 0:
698
+ tracking = result.stdout.strip() # e.g., "origin/main"
699
+ parts = tracking.split("/", 1)
700
+ if len(parts) == 2:
701
+ remote_name, remote_branch = parts
702
+ info["git_remote_branch"] = remote_branch
703
+
704
+ # Remote URL
705
+ result = subprocess.run(
706
+ ["git", "remote", "get-url", remote_name],
707
+ cwd=str(directory), capture_output=True, text=True, timeout=5,
708
+ )
709
+ if result.returncode == 0:
710
+ info["git_remote_url"] = result.stdout.strip()
711
+
712
+ # Remote SHA
713
+ result = subprocess.run(
714
+ ["git", "rev-parse", tracking],
715
+ cwd=str(directory), capture_output=True, text=True, timeout=5,
716
+ )
717
+ if result.returncode == 0:
718
+ info["git_remote_sha"] = result.stdout.strip()
719
+
720
+ # Fallback: if no tracking branch, use local sha/branch and origin URL
721
+ if "git_remote_url" not in info:
722
+ result = subprocess.run(
723
+ ["git", "remote", "get-url", "origin"],
724
+ cwd=str(directory), capture_output=True, text=True, timeout=5,
725
+ )
726
+ if result.returncode == 0:
727
+ info["git_remote_url"] = result.stdout.strip()
728
+ if "git_remote_sha" not in info and "git_local_sha" in info:
729
+ info["git_remote_sha"] = info["git_local_sha"]
730
+ if "git_remote_branch" not in info and "git_local_branch" in info:
731
+ info["git_remote_branch"] = info["git_local_branch"]
732
+
733
+ except (subprocess.TimeoutExpired, FileNotFoundError, OSError) as e:
734
+ if logger:
735
+ logger.warn(f"⚠️ Git not available ({type(e).__name__}: {e}). "
736
+ "Will try CI environment variables.")
737
+
738
+ # Fallback: populate missing fields from CI environment variables
739
+ _fill_from_ci_env(info)
740
+
741
+ # Normalize SSH git URLs to HTTPS (e.g. git@github.com:Org/repo.git -> https://github.com/Org/repo.git)
742
+ if "git_remote_url" in info:
743
+ info["git_remote_url"] = _normalize_git_url(info["git_remote_url"])
744
+
745
+ # Log resolved git metadata and warn about missing fields
746
+ if logger:
747
+ logger.info(" Git metadata resolved:")
748
+ logger.info(f" _git_root: {info.get('_git_root', '(not set)')}")
749
+ logger.info(f" git_remote_sha: {info.get('git_remote_sha', '(not set)')}")
750
+ logger.info(f" git_remote_branch: {info.get('git_remote_branch', '(not set)')}")
751
+ logger.info(f" git_remote_url: {info.get('git_remote_url', '(not set)')}")
752
+
753
+ missing = []
754
+ if "_git_root" not in info:
755
+ missing.append("git_root (needed for component_yaml_path)")
756
+ if "git_remote_url" not in info:
757
+ missing.append("git_remote_url")
758
+ if "git_remote_sha" not in info:
759
+ missing.append("git_remote_sha")
760
+ if "git_remote_branch" not in info:
761
+ missing.append("git_remote_branch")
762
+ if missing:
763
+ logger.warn(
764
+ f"⚠️ Missing git metadata: {', '.join(missing)}. "
765
+ "Published components will lack source links and transparency signals. "
766
+ "Pass --git-remote-sha/--git-remote-branch/--git-remote-url or run from a git repo."
767
+ )
768
+
769
+ return info
770
+
771
+
772
+ def set_component_yaml_path(rel_path: str, annotations: dict[str, str], *, overwrite: bool = True) -> None:
773
+ """Split a repo-relative path into git_relative_dir and component_yaml_path annotations.
774
+
775
+ Given ``"a/b/comp.yaml"``, sets ``git_relative_dir="a/b"`` and
776
+ ``component_yaml_path="comp.yaml"``. For a bare filename like
777
+ ``"comp.yaml"``, only ``component_yaml_path`` is set.
778
+
779
+ Args:
780
+ overwrite: If False, preserve existing values (setdefault semantics).
781
+ """
782
+ parts = rel_path.rsplit("/", 1)
783
+ if overwrite:
784
+ if len(parts) == 2:
785
+ annotations["git_relative_dir"] = parts[0]
786
+ annotations["component_yaml_path"] = parts[1]
787
+ else:
788
+ annotations["component_yaml_path"] = rel_path
789
+ else:
790
+ if len(parts) == 2:
791
+ annotations.setdefault("git_relative_dir", parts[0])
792
+ annotations.setdefault("component_yaml_path", parts[1])
793
+ else:
794
+ annotations.setdefault("component_yaml_path", rel_path)
795
+
796
+
797
+ def normalize_annotation_paths(
798
+ yaml_path: "str | Path",
799
+ git_root: "str | Path",
800
+ annotations: dict[str, str],
801
+ ) -> None:
802
+ """Normalize ``dockerfile_path`` and ``documentation_path`` to be relative to ``git_relative_dir``.
803
+
804
+ Component authors may write path annotations relative to the YAML file's
805
+ directory (e.g. ``../../../../dockerfiles/foo.Dockerfile``) or relative to
806
+ ``git_relative_dir`` (e.g. ``dockerfiles/foo.Dockerfile``). This function
807
+ resolves each path using filesystem checks and re-expresses it relative to
808
+ the final ``git_relative_dir``.
809
+
810
+ Resolution order for each path annotation:
811
+
812
+ 1. Relative to ``git_relative_dir`` — if the file exists, leave the value
813
+ as-is (already correct).
814
+ 2. Relative to the YAML file's parent directory — if the file exists,
815
+ re-express it relative to ``git_relative_dir``.
816
+ 3. If neither resolves to an existing file, leave the value unchanged.
817
+
818
+ This is a no-op when ``git_relative_dir`` equals the YAML file's parent
819
+ directory (the common case).
820
+
821
+ Args:
822
+ yaml_path: Filesystem path to the component YAML file.
823
+ git_root: Filesystem path to the git repository root.
824
+ annotations: The ``metadata.annotations`` dict (modified in place).
825
+ """
826
+ import os
827
+ from pathlib import Path as _Path
828
+
829
+ git_relative_dir = annotations.get("git_relative_dir")
830
+ if not git_relative_dir:
831
+ return
832
+
833
+ git_root = _Path(git_root)
834
+ yaml_parent = _Path(yaml_path).resolve().parent
835
+ git_rel_dir_abs = (git_root / git_relative_dir).resolve()
836
+
837
+ # If git_relative_dir resolves to the YAML parent, paths are equivalent — skip
838
+ if git_rel_dir_abs == yaml_parent:
839
+ return
840
+
841
+ for key in ("dockerfile_path", "documentation_path"):
842
+ value = annotations.get(key)
843
+ if not value:
844
+ continue
845
+
846
+ # 1. Already relative to git_relative_dir?
847
+ candidate_git = git_rel_dir_abs / value
848
+ if candidate_git.resolve().exists():
849
+ continue # already correct
850
+
851
+ # 2. Relative to YAML parent dir?
852
+ candidate_yaml = yaml_parent / value
853
+ if candidate_yaml.resolve().exists():
854
+ # Re-express relative to git_relative_dir. Use os.path.relpath
855
+ # rather than Path.relative_to so that files *above*
856
+ # git_relative_dir produce ``../`` prefixed paths.
857
+ normalized = os.path.relpath(
858
+ str(candidate_yaml.resolve()), str(git_rel_dir_abs)
859
+ )
860
+ annotations[key] = normalized
861
+
862
+
863
+ # CI environment variables probed for git metadata (checked in order, first
864
+ # match wins). Covers Buildkite, GitHub Actions, and GitLab CI out of the
865
+ # box. Wrapper packages can prepend additional CI-system-specific variables
866
+ # by monkey-patching these module attributes at import time.
867
+ _CI_GIT_ROOT_VARS: tuple[str, ...] = ("BUILDKITE_BUILD_CHECKOUT_PATH", "GITHUB_WORKSPACE", "CI_PROJECT_DIR")
868
+ _CI_SHA_VARS: tuple[str, ...] = ("BUILDKITE_COMMIT", "GITHUB_SHA", "CI_COMMIT_SHA")
869
+ _CI_BRANCH_VARS: tuple[str, ...] = ("BUILDKITE_BRANCH", "GITHUB_REF_NAME", "CI_COMMIT_BRANCH")
870
+ _CI_REPO_URL_VARS: tuple[str, ...] = ("BUILDKITE_REPO", "GITHUB_SERVER_URL", "CI_REPOSITORY_URL")
871
+
872
+
873
+ def _normalize_git_url(url: str) -> str:
874
+ """Normalize a git remote URL to a browsable HTTPS URL.
875
+
876
+ Handles common formats:
877
+ - ``git@github.com:Org/repo.git`` -> ``https://github.com/Org/repo``
878
+ - ``ssh://git@github.com/Org/repo.git`` -> ``https://github.com/Org/repo``
879
+ - ``https://github.com/Org/repo.git`` -> ``https://github.com/Org/repo``
880
+ - ``https://github.com/Org/repo`` -> unchanged
881
+
882
+ The ``.git`` suffix is stripped so the result can be used directly to
883
+ build ``/blob/{ref}/{path}`` links without an extra ``.removesuffix``.
884
+ """
885
+ import re
886
+
887
+ # SCP-style: git@host:path
888
+ m = re.match(r"^git@([^:]+):(.+)$", url)
889
+ if m:
890
+ url = f"https://{m.group(1)}/{m.group(2)}"
891
+ else:
892
+ # ssh://git@host/path
893
+ m = re.match(r"^ssh://(?:[^@]+@)?([^/]+)/(.+)$", url)
894
+ if m:
895
+ url = f"https://{m.group(1)}/{m.group(2)}"
896
+
897
+ return url.removesuffix(".git")
898
+
899
+
900
+ def _fill_from_ci_env(info: dict[str, str]) -> None:
901
+ """Fill missing git info fields from common CI environment variables.
902
+
903
+ The env var lists are defined as module-level constants
904
+ (``_CI_GIT_ROOT_VARS``, ``_CI_SHA_VARS``, ``_CI_BRANCH_VARS``,
905
+ ``_CI_REPO_URL_VARS``) so they can be extended to support new CI systems.
906
+ """
907
+ import os
908
+
909
+ if "_git_root" not in info:
910
+ for var in _CI_GIT_ROOT_VARS:
911
+ val = os.environ.get(var)
912
+ if val:
913
+ info["_git_root"] = val
914
+ break
915
+
916
+ if "git_remote_sha" not in info:
917
+ for var in _CI_SHA_VARS:
918
+ val = os.environ.get(var)
919
+ if val:
920
+ info["git_remote_sha"] = val
921
+ break
922
+
923
+ if "git_remote_branch" not in info:
924
+ for var in _CI_BRANCH_VARS:
925
+ val = os.environ.get(var)
926
+ if val:
927
+ info["git_remote_branch"] = val
928
+ break
929
+
930
+ if "git_remote_url" not in info:
931
+ for var in _CI_REPO_URL_VARS:
932
+ val = os.environ.get(var)
933
+ if val:
934
+ # GITHUB_SERVER_URL needs GITHUB_REPOSITORY appended
935
+ if var == "GITHUB_SERVER_URL":
936
+ repo = os.environ.get("GITHUB_REPOSITORY", "")
937
+ if repo:
938
+ val = f"{val}/{repo}"
939
+ else:
940
+ continue
941
+ info["git_remote_url"] = val
942
+ break