tangle-cli 0.0.1a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. tangle_cli/__init__.py +19 -0
  2. tangle_cli/api_cli.py +787 -0
  3. tangle_cli/api_schema.py +633 -0
  4. tangle_cli/api_transport.py +461 -0
  5. tangle_cli/args_container.py +244 -0
  6. tangle_cli/artifacts.py +293 -0
  7. tangle_cli/artifacts_cli.py +108 -0
  8. tangle_cli/cli.py +57 -0
  9. tangle_cli/cli_helpers.py +116 -0
  10. tangle_cli/cli_options.py +52 -0
  11. tangle_cli/client.py +677 -0
  12. tangle_cli/component_from_func.py +1856 -0
  13. tangle_cli/component_generator.py +298 -0
  14. tangle_cli/component_inspector.py +494 -0
  15. tangle_cli/component_publisher.py +921 -0
  16. tangle_cli/components_cli.py +269 -0
  17. tangle_cli/dynamic_discovery_client.py +296 -0
  18. tangle_cli/generated_model_extensions.py +405 -0
  19. tangle_cli/generated_runtime.py +43 -0
  20. tangle_cli/handler.py +96 -0
  21. tangle_cli/hydration_trust.py +222 -0
  22. tangle_cli/logger.py +166 -0
  23. tangle_cli/models.py +407 -0
  24. tangle_cli/module_bundler.py +662 -0
  25. tangle_cli/openapi/__init__.py +0 -0
  26. tangle_cli/openapi/codegen.py +1090 -0
  27. tangle_cli/openapi/parser.py +77 -0
  28. tangle_cli/pipeline_dehydrator.py +720 -0
  29. tangle_cli/pipeline_hydrator.py +1785 -0
  30. tangle_cli/pipeline_run_annotations.py +41 -0
  31. tangle_cli/pipeline_run_details.py +203 -0
  32. tangle_cli/pipeline_run_manager.py +1994 -0
  33. tangle_cli/pipeline_run_search.py +712 -0
  34. tangle_cli/pipeline_runner.py +620 -0
  35. tangle_cli/pipeline_runs_cli.py +584 -0
  36. tangle_cli/pipelines.py +581 -0
  37. tangle_cli/pipelines_cli.py +271 -0
  38. tangle_cli/published_components_cli.py +373 -0
  39. tangle_cli/py.typed +0 -0
  40. tangle_cli/quickstart.py +110 -0
  41. tangle_cli/secrets.py +156 -0
  42. tangle_cli/secrets_cli.py +269 -0
  43. tangle_cli/utils.py +942 -0
  44. tangle_cli/version_manager.py +470 -0
  45. tangle_cli-0.0.1a1.dist-info/METADATA +561 -0
  46. tangle_cli-0.0.1a1.dist-info/RECORD +48 -0
  47. tangle_cli-0.0.1a1.dist-info/WHEEL +4 -0
  48. tangle_cli-0.0.1a1.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,720 @@
1
+ """Pipeline dehydration helpers for hydrated Tangle pipeline specs.
2
+
3
+ The dehydrator is the inverse companion to :mod:`tangle_cli.pipeline_hydrator`:
4
+ it replaces full ``componentRef.spec`` blocks with portable digest/name/url/file
5
+ references, and can export a hydrated pipeline into a Jinja2 template + config
6
+ pair. The code is intentionally native-free; downstream packages can provide a
7
+ client for component-library existence checks and URI reader/writer hooks for
8
+ schemes such as ``gs://`` without this module importing those SDKs.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import copy
14
+ import json
15
+ import os
16
+ import re
17
+ import textwrap
18
+ from collections.abc import Mapping
19
+ from dataclasses import dataclass
20
+ from pathlib import Path
21
+ from typing import Any
22
+
23
+ import yaml
24
+
25
+ from . import utils
26
+ from .api_transport import DEFAULT_API_URL
27
+ from .handler import TangleCliHandler
28
+ from .logger import Logger, get_default_logger
29
+ from .pipeline_hydrator import PipelineHydrator, ResolverContext, UriReader, UriWriter
30
+
31
+ PATH_SEPARATOR = "|" # Use | as separator since task names can contain dots.
32
+
33
+
34
+ @dataclass(frozen=True)
35
+ class Jinja2ExportResult:
36
+ """Result of exporting a pipeline to Jinja2 templates."""
37
+
38
+ main_template_path: Path
39
+ config_file_path: Path
40
+ subtemplates_count: int
41
+ top_level_params_count: int
42
+ subtemplate_paths: list[Path]
43
+
44
+
45
+ class DehydrateChoice:
46
+ """Constants for dehydration choices.
47
+
48
+ Lowercase values apply to the current component. Downstream interactive
49
+ callers may use uppercase values to remember a choice for the same digest.
50
+ """
51
+
52
+ DIGEST = "d"
53
+ NAME = "n"
54
+ URL = "u"
55
+ FILE = "f"
56
+ KEEP = "k"
57
+ AUTO = "a"
58
+
59
+
60
+ class PipelineDehydrator(TangleCliHandler):
61
+ """Dehydrate pipeline YAML by replacing full component specs with refs.
62
+
63
+ Supported choices:
64
+ - ``DIGEST``: replace with ``componentRef.digest``
65
+ - ``NAME``: replace with ``componentRef.name``
66
+ - ``URL``: replace with ``componentRef.url`` when a canonical URL exists
67
+ - ``FILE``: extract the component spec and reference it by URL
68
+ - ``KEEP``: preserve the full spec
69
+ - ``AUTO``: URL if canonical, else digest when the optional client can find
70
+ the component in the library, else file extraction
71
+
72
+ URI I/O is delegated through the same native-free hooks as the hydrator.
73
+ OSS registers no cloud schemes by default; downstream packages can pass or
74
+ register URI hooks for ``gs://`` or other backends.
75
+ """
76
+
77
+ def __init__(
78
+ self,
79
+ remembered_choices: Mapping[str, str] | None = None,
80
+ components_dir: Path | str | None = None,
81
+ output_file: Path | str | None = None,
82
+ client: Any = None,
83
+ interactive: bool = False,
84
+ logger: Logger | None = None,
85
+ component_extension: str | None = None,
86
+ *,
87
+ base_url: str | None = None,
88
+ uri_readers: Mapping[str, UriReader] | None = None,
89
+ uri_writers: Mapping[str, UriWriter] | None = None,
90
+ ) -> None:
91
+ super().__init__(
92
+ client=client,
93
+ logger=logger,
94
+ base_url=base_url or DEFAULT_API_URL,
95
+ )
96
+ self.remembered_choices = dict(remembered_choices or {})
97
+ self.output_file = output_file
98
+ self.component_extension = component_extension or ".yaml"
99
+
100
+ self._components_dir_explicit = components_dir is not None
101
+ if components_dir is not None:
102
+ self.components_dir: Path | str = components_dir
103
+ elif output_file is not None:
104
+ self.components_dir = self._join_destination(self._destination_parent(output_file), "components")
105
+ else:
106
+ self.components_dir = Path("components")
107
+
108
+ self.interactive = interactive
109
+ self._saved_components: dict[str, Path | str] = {}
110
+ self._current_reference_file: Path | str | None = output_file
111
+ self._io = PipelineHydrator(
112
+ enable_resolution=False,
113
+ logger=self.log,
114
+ base_url=self.base_url,
115
+ uri_readers=uri_readers,
116
+ uri_writers=uri_writers,
117
+ )
118
+
119
+ def _is_auto_mode(self) -> bool:
120
+ """Return True when any remembered choice asks for auto mode."""
121
+
122
+ return DehydrateChoice.AUTO in self.remembered_choices.values()
123
+
124
+ @staticmethod
125
+ def _uri_scheme(value: Path | str | None) -> str | None:
126
+ if value is None:
127
+ return None
128
+ return PipelineHydrator._uri_scheme(str(value))
129
+
130
+ @classmethod
131
+ def _is_local_destination(cls, value: Path | str | None) -> bool:
132
+ scheme = cls._uri_scheme(value)
133
+ return scheme is None or scheme == "file"
134
+
135
+ @classmethod
136
+ def _destination_parent(cls, value: Path | str) -> Path | str:
137
+ value_str = str(value)
138
+ scheme = cls._uri_scheme(value)
139
+ if scheme and scheme != "file":
140
+ return value_str.rsplit("/", 1)[0] if "/" in value_str else value_str
141
+ path = Path(value_str[7:] if value_str.startswith("file://") else value_str)
142
+ return path.parent
143
+
144
+ @classmethod
145
+ def _join_destination(cls, parent: Path | str, filename: str) -> Path | str:
146
+ if cls._uri_scheme(parent) and cls._uri_scheme(parent) != "file":
147
+ return f"{str(parent).rstrip('/')}/{filename}"
148
+ return Path(parent) / filename
149
+
150
+ def _resolver_context(self, uri: str, kind: str) -> ResolverContext:
151
+ return self._io.make_resolver_context(self._uri_scheme(uri) or kind, uri, kind, None)
152
+
153
+ def _read_text(self, source: Path | str, *, kind: str = "pipeline") -> str:
154
+ return self._io._read_uri_text(str(source), kind, self._resolver_context(str(source), kind)) or ""
155
+
156
+ def _write_text(self, destination: Path | str, content: str, *, kind: str = "output") -> None:
157
+ self._io._write_uri_text(str(destination), content, self._resolver_context(str(destination), kind))
158
+
159
+ def load_file(self, input_file: Path | str) -> dict[str, Any]:
160
+ """Read a local or URI pipeline YAML file through the registered hooks."""
161
+
162
+ data = yaml.safe_load(self._read_text(input_file, kind="pipeline"))
163
+ return data or {}
164
+
165
+ def write_file(self, data: dict[str, Any], output_file: Path | str | None = None) -> None:
166
+ """Write pipeline YAML to a local path or URI through registered hooks."""
167
+
168
+ destination = output_file or self.output_file
169
+ if destination is None:
170
+ raise ValueError("output_file is required")
171
+ self._write_text(destination, utils.dump_yaml(data), kind="output")
172
+
173
+ def dehydrate_file(
174
+ self,
175
+ input_file: Path | str,
176
+ output_file: Path | str | None = None,
177
+ ) -> dict[str, Any]:
178
+ """Read, dehydrate, and write a pipeline YAML file.
179
+
180
+ Both input and output support local paths and any URI schemes provided
181
+ by registered/passed hydrator URI hooks.
182
+ """
183
+
184
+ previous_output = self.output_file
185
+ previous_reference = self._current_reference_file
186
+ previous_components_dir = self.components_dir
187
+ if output_file is not None:
188
+ self.output_file = output_file
189
+ self._current_reference_file = output_file
190
+ if not self._components_dir_explicit:
191
+ self.components_dir = self._join_destination(self._destination_parent(output_file), "components")
192
+ try:
193
+ data = self.load_file(input_file)
194
+ output = self.dehydrate(data)
195
+ self.write_file(output, output_file)
196
+ return output
197
+ finally:
198
+ self.output_file = previous_output
199
+ self._current_reference_file = previous_reference
200
+ self.components_dir = previous_components_dir
201
+
202
+ def _auto_dehydrate_choice(
203
+ self,
204
+ canonical_url: str | None,
205
+ resolved_digest: str,
206
+ name: str,
207
+ _spec: dict[str, Any],
208
+ path: str,
209
+ ) -> str:
210
+ """Determine Auto mode outcome: ``url``, ``digest``, or ``file``."""
211
+
212
+ self.log.info(f" Auto: '{name}' at {path} (digest: {resolved_digest[:16]}...)")
213
+ if canonical_url:
214
+ self.log.info(" Auto: has canonical URL -> url ref")
215
+ return "url"
216
+ if not resolved_digest or resolved_digest == "unknown":
217
+ self.log.info(" Auto: no digest -> file")
218
+ return "file"
219
+ try:
220
+ client = self._get_client()
221
+ except (Exception, SystemExit):
222
+ self.log.info(" Auto: no API client available -> file")
223
+ return "file"
224
+ if client is None:
225
+ self.log.info(" Auto: no API client provided -> file")
226
+ return "file"
227
+ try:
228
+ client.get_component_spec(resolved_digest)
229
+ self.log.info(f" Auto: digest {resolved_digest[:16]} found in library -> digest ref")
230
+ return "digest"
231
+ except Exception:
232
+ self.log.info(f" Auto: digest {resolved_digest[:16]} not in library -> file")
233
+ return "file"
234
+
235
+ def _prompt_choice(self, name: str, digest: str, canonical_url: str | None, path: str) -> str:
236
+ self.log.info(f"\n📦 Found componentRef at: {path}")
237
+ self.log.info(f" Name: {name}")
238
+ self.log.info(f" Digest: {digest[:16]}...")
239
+ if canonical_url:
240
+ self.log.info(f" URL: {canonical_url}")
241
+ self.log.info(" Options:")
242
+ self.log.info(f" [{DehydrateChoice.DIGEST}] Replace with componentRef.digest")
243
+ self.log.info(f" [{DehydrateChoice.NAME}] Replace with componentRef.name")
244
+ if canonical_url:
245
+ self.log.info(f" [{DehydrateChoice.URL}] Replace with componentRef.url")
246
+ self.log.info(f" [{DehydrateChoice.FILE}] Extract to file and use file:// URL")
247
+ self.log.info(f" [{DehydrateChoice.AUTO}] Auto: URL if present, else digest if in library, else file")
248
+ self.log.info(f" [{DehydrateChoice.KEEP}] Leave as is (keep full spec)")
249
+ self.log.info(f" [{DehydrateChoice.DIGEST.upper()}] Always replace this component with digest")
250
+ self.log.info(f" [{DehydrateChoice.NAME.upper()}] Always replace this component with name")
251
+ if canonical_url:
252
+ self.log.info(f" [{DehydrateChoice.URL.upper()}] Always replace this component with URL")
253
+ self.log.info(f" [{DehydrateChoice.FILE.upper()}] Always extract to file")
254
+ choice = input(f" Choice [{DehydrateChoice.AUTO}]: ").strip() or DehydrateChoice.AUTO
255
+ return choice
256
+
257
+ def _process_task(
258
+ self,
259
+ task_name: str,
260
+ task_data: dict[str, Any],
261
+ path: str,
262
+ base_dir: Path | None = None,
263
+ _recursive_params: dict[str, Any] | None = None,
264
+ ) -> dict[str, Any]:
265
+ """Dehydrate a single non-subgraph task's componentRef."""
266
+
267
+ del task_name, base_dir, _recursive_params
268
+ if not isinstance(task_data, dict) or "componentRef" not in task_data:
269
+ return task_data
270
+
271
+ component_ref = task_data["componentRef"]
272
+ if not isinstance(component_ref, dict) or "spec" not in component_ref:
273
+ return task_data
274
+
275
+ name, digest = utils.get_component_ref_info(component_ref)
276
+ spec = component_ref.get("spec", {})
277
+ if not isinstance(spec, dict):
278
+ return task_data
279
+
280
+ canonical_url = spec.get("metadata", {}).get("annotations", {}).get("canonical_location")
281
+ resolved_digest = component_ref.get("digest") or utils.compute_spec_digest(spec)
282
+ choice = (
283
+ self.remembered_choices.get(resolved_digest)
284
+ or self.remembered_choices.get(digest)
285
+ or self.remembered_choices.get("")
286
+ )
287
+ if choice:
288
+ if choice == DehydrateChoice.URL and not canonical_url:
289
+ choice = DehydrateChoice.DIGEST
290
+ if choice != DehydrateChoice.AUTO:
291
+ self.log.info(f" Using remembered choice: {choice}")
292
+ elif self.interactive:
293
+ choice = self._prompt_choice(name, digest, canonical_url, path)
294
+ if choice == DehydrateChoice.DIGEST.upper():
295
+ self.remembered_choices[resolved_digest] = DehydrateChoice.DIGEST
296
+ choice = DehydrateChoice.DIGEST
297
+ elif choice == DehydrateChoice.NAME.upper():
298
+ self.remembered_choices[resolved_digest] = DehydrateChoice.NAME
299
+ choice = DehydrateChoice.NAME
300
+ elif choice == DehydrateChoice.URL.upper() and canonical_url:
301
+ self.remembered_choices[resolved_digest] = DehydrateChoice.URL
302
+ choice = DehydrateChoice.URL
303
+ elif choice == DehydrateChoice.FILE.upper():
304
+ self.remembered_choices[resolved_digest] = DehydrateChoice.FILE
305
+ choice = DehydrateChoice.FILE
306
+ else:
307
+ choice = DehydrateChoice.AUTO
308
+
309
+ new_task = {k: v for k, v in task_data.items() if k != "componentRef"}
310
+
311
+ if choice == DehydrateChoice.AUTO:
312
+ effective = self._auto_dehydrate_choice(canonical_url, resolved_digest, name, spec, path)
313
+ if effective == "url":
314
+ new_task["componentRef"] = {"url": canonical_url}
315
+ self.log.info(" → Auto: Replaced with componentRef.url")
316
+ elif effective == "digest":
317
+ new_task["componentRef"] = {"digest": resolved_digest}
318
+ self.log.info(" → Auto: Replaced with componentRef.digest (found in library)")
319
+ else:
320
+ file_url = self._save_component_to_file(name, resolved_digest, spec)
321
+ new_task["componentRef"] = {"url": file_url}
322
+ self.log.info(" → Auto: Extracted to file (no URL, not in library or no client)")
323
+ elif choice == DehydrateChoice.DIGEST:
324
+ new_task["componentRef"] = {"digest": resolved_digest}
325
+ self.log.info(" → Replaced with componentRef.digest")
326
+ elif choice == DehydrateChoice.NAME:
327
+ new_task["componentRef"] = {"name": name}
328
+ self.log.info(" → Replaced with componentRef.name")
329
+ elif choice == DehydrateChoice.URL and canonical_url:
330
+ new_task["componentRef"] = {"url": canonical_url}
331
+ self.log.info(" → Replaced with componentRef.url")
332
+ elif choice == DehydrateChoice.FILE:
333
+ file_url = self._save_component_to_file(name, resolved_digest, spec)
334
+ new_task["componentRef"] = {"url": file_url}
335
+ self.log.info(f" → Extracted to {file_url}")
336
+ else:
337
+ new_task["componentRef"] = component_ref
338
+ self.log.info(" → Kept as componentRef (full spec)")
339
+
340
+ return new_task
341
+
342
+ def _safe_filename(self, name: str, fallback: str = "component") -> str:
343
+ safe_name = name.lower().replace(" ", "_").replace("-", "_")
344
+ safe_name = "".join(c for c in safe_name if c.isalnum() or c == "_")
345
+ return safe_name or fallback
346
+
347
+ def _save_component_to_file(self, name: str, digest: str, spec: dict[str, Any]) -> str:
348
+ """Save a component spec once and return a reference URL for this file."""
349
+
350
+ if digest not in self._saved_components:
351
+ filename = f"{self._safe_filename(name)}{self.component_extension}"
352
+ destination = self._join_destination(self.components_dir, filename)
353
+ self._write_text(destination, utils.dump_yaml(spec), kind="component")
354
+ if self._is_local_destination(destination):
355
+ destination_text = str(destination)
356
+ if destination_text.startswith("file://"):
357
+ destination_text = destination_text[7:]
358
+ destination = Path(destination_text).resolve()
359
+ self._saved_components[digest] = destination
360
+ return self._make_ref_url(self._saved_components[digest])
361
+
362
+ def _make_ref_url(self, target: Path | str) -> str:
363
+ """Create a componentRef URL for a saved target."""
364
+
365
+ if not self._is_local_destination(target):
366
+ return str(target)
367
+ return self._make_file_url(Path(str(target)[7:] if str(target).startswith("file://") else str(target)))
368
+
369
+ def _make_file_url(self, target_path: Path) -> str:
370
+ """Create a file:// URL relative to the current reference file."""
371
+
372
+ ref_file = self._current_reference_file or self.output_file
373
+ if ref_file and self._is_local_destination(ref_file):
374
+ ref_str = str(ref_file)
375
+ ref_path = Path(ref_str[7:] if ref_str.startswith("file://") else ref_str)
376
+ ref_dir = ref_path.parent.resolve()
377
+ rel = os.path.relpath(target_path.resolve(), ref_dir)
378
+ return f"file://./{rel}"
379
+ return f"file://{target_path.resolve()}"
380
+
381
+ @staticmethod
382
+ def _relativize_file_urls(spec: dict[str, Any], reference_dir: Path) -> None:
383
+ """Convert absolute file:// URLs in a spec's tasks relative to reference_dir."""
384
+
385
+ tasks = spec.get("implementation", {}).get("graph", {}).get("tasks", {})
386
+ resolved_ref_dir = reference_dir.resolve()
387
+ for task_data in tasks.values():
388
+ if not isinstance(task_data, dict):
389
+ continue
390
+ component_ref = task_data.get("componentRef")
391
+ if not isinstance(component_ref, dict) or "url" not in component_ref:
392
+ continue
393
+ url = component_ref["url"]
394
+ if not isinstance(url, str) or not url.startswith("file:///"):
395
+ continue
396
+ abs_path = Path(url[7:])
397
+ rel = os.path.relpath(abs_path, resolved_ref_dir)
398
+ component_ref["url"] = f"file://./{rel}"
399
+
400
+ def _subgraph_destination(self, filename: str) -> Path | str:
401
+ if self.output_file is not None:
402
+ subgraph_dir = self._join_destination(self._destination_parent(self.output_file), "subgraphs")
403
+ else:
404
+ subgraph_dir = self._join_destination(self.components_dir, "subgraphs")
405
+ return self._join_destination(subgraph_dir, filename)
406
+
407
+ def _extract_subgraphs_to_files(self, data: dict[str, Any]) -> dict[str, Any]:
408
+ """Extract subgraph specs to YAML files and replace them with URL refs."""
409
+
410
+ queue = _build_subgraph_processing_queue(data)
411
+ subgraph_counter = 0
412
+
413
+ for depth, path in queue:
414
+ if depth == 0:
415
+ continue
416
+
417
+ result = _get_subgraph_by_path(data, path)
418
+ if not result:
419
+ continue
420
+ component_ref, spec = result
421
+
422
+ spec_name = spec.get("name", "subgraph")
423
+ filename = f"{self._safe_filename(str(spec_name), 'subgraph')}_{subgraph_counter}{self.component_extension}"
424
+ subgraph_counter += 1
425
+ destination = self._subgraph_destination(filename)
426
+
427
+ original_ref = self._current_reference_file
428
+ self._current_reference_file = destination
429
+ try:
430
+ spec_to_write = utils.traverse_pipeline_tasks(copy.deepcopy(spec), str(spec_name), self._process_task)
431
+ finally:
432
+ self._current_reference_file = original_ref
433
+
434
+ if self._is_local_destination(destination):
435
+ destination_text = str(destination)
436
+ if destination_text.startswith("file://"):
437
+ destination_text = destination_text[7:]
438
+ destination_path = Path(destination_text)
439
+ self._relativize_file_urls(spec_to_write, destination_path.parent)
440
+ self._write_text(destination_path, utils.dump_yaml(spec_to_write) + "\n", kind="subgraph")
441
+ component_url = f"file://{destination_path.resolve()}"
442
+ else:
443
+ self._write_text(destination, utils.dump_yaml(spec_to_write) + "\n", kind="subgraph")
444
+ component_url = str(destination)
445
+
446
+ self.log.info(f" 📦 Extracted subgraph '{spec_name}' -> {filename}")
447
+ component_ref.clear()
448
+ component_ref["url"] = component_url
449
+
450
+ if self.output_file and self._is_local_destination(self.output_file):
451
+ output_file_text = str(self.output_file)
452
+ if output_file_text.startswith("file://"):
453
+ output_file_text = output_file_text[7:]
454
+ output_path = Path(output_file_text)
455
+ self._relativize_file_urls(data, output_path.parent)
456
+
457
+ return data
458
+
459
+ def dehydrate(self, data: dict[str, Any]) -> dict[str, Any]:
460
+ """Return a dehydrated copy of *data* according to configured choices."""
461
+
462
+ working = copy.deepcopy(data)
463
+ if self.remembered_choices.get("") == DehydrateChoice.AUTO:
464
+ self._extract_subgraphs_to_files(working)
465
+
466
+ pipeline_name = working.get("name", "pipeline")
467
+ return utils.traverse_pipeline_tasks(working, str(pipeline_name), self._process_task)
468
+
469
+ def export_to_jinja2(
470
+ self,
471
+ data: dict[str, Any],
472
+ output_file: Path,
473
+ jinja2_path: Path,
474
+ ) -> Jinja2ExportResult:
475
+ """Dehydrate a pipeline and export it to Jinja2 template files."""
476
+
477
+ previous_output = self.output_file
478
+ previous_reference = self._current_reference_file
479
+ previous_components_dir = self.components_dir
480
+ self.output_file = output_file
481
+ self._current_reference_file = output_file
482
+ if not self._components_dir_explicit:
483
+ self.components_dir = self._join_destination(self._destination_parent(output_file), "components")
484
+ try:
485
+ output_yaml = self.dehydrate(data)
486
+ finally:
487
+ self.output_file = previous_output
488
+ self._current_reference_file = previous_reference
489
+ self.components_dir = previous_components_dir
490
+
491
+ jinja2_path.parent.mkdir(parents=True, exist_ok=True)
492
+ output_file.parent.mkdir(parents=True, exist_ok=True)
493
+
494
+ base_name = jinja2_path.stem
495
+ if base_name.endswith(".yaml"):
496
+ base_name = base_name[:-5]
497
+
498
+ top_level_defaults = _extract_input_defaults(output_yaml)
499
+ modified_data, subtemplates = _process_subgraphs_to_subtemplates(output_yaml, self.log)
500
+ template_data = _replace_input_defaults_with_placeholders(modified_data)
501
+
502
+ subtemplate_paths: list[Path] = []
503
+ for subtemplate_id, subtemplate_info in subtemplates.items():
504
+ subtemplate_file = jinja2_path.parent / f"{base_name}_{subtemplate_id}.yaml.j2"
505
+ subtemplate_yaml = utils.dump_yaml(subtemplate_info["spec"])
506
+
507
+ path_depth = subtemplate_info["path"].count(PATH_SEPARATOR) // 2
508
+ indent = " " * (12 * path_depth)
509
+ subtemplate_yaml = textwrap.indent(subtemplate_yaml, indent)
510
+ subtemplate_yaml = _convert_templateid_to_includes(subtemplate_yaml, subtemplates, base_name)
511
+
512
+ subtemplate_file.write_text(subtemplate_yaml, encoding="utf-8")
513
+ subtemplate_paths.append(subtemplate_file)
514
+ self.log.info(f" 📄 Wrote {subtemplate_file.name}")
515
+
516
+ main_yaml = utils.dump_yaml(template_data)
517
+ main_yaml = _convert_templateid_to_includes(main_yaml, subtemplates, base_name)
518
+ jinja2_path.write_text(main_yaml, encoding="utf-8")
519
+
520
+ try:
521
+ rel_template_path = jinja2_path.relative_to(output_file.parent)
522
+ except ValueError:
523
+ rel_template_path = jinja2_path
524
+
525
+ config_data: dict[str, Any] = {"template_file": str(rel_template_path), **top_level_defaults}
526
+ output_file.write_text(utils.dump_yaml(config_data), encoding="utf-8")
527
+
528
+ return Jinja2ExportResult(
529
+ main_template_path=jinja2_path,
530
+ config_file_path=output_file,
531
+ subtemplates_count=len(subtemplates),
532
+ top_level_params_count=len(top_level_defaults),
533
+ subtemplate_paths=subtemplate_paths,
534
+ )
535
+
536
+
537
+ def _extract_input_defaults(data: dict[str, Any]) -> dict[str, Any]:
538
+ """Extract default values from top-level inputs."""
539
+
540
+ defaults: dict[str, Any] = {}
541
+ inputs = data.get("inputs", [])
542
+ if isinstance(inputs, list):
543
+ for input_spec in inputs:
544
+ if isinstance(input_spec, dict) and "name" in input_spec and "default" in input_spec:
545
+ defaults[_sanitize_variable_name(str(input_spec["name"]))] = input_spec["default"]
546
+ elif isinstance(inputs, dict):
547
+ for name, input_def in inputs.items():
548
+ if isinstance(input_def, dict) and "default" in input_def:
549
+ defaults[_sanitize_variable_name(str(name))] = input_def["default"]
550
+ return defaults
551
+
552
+
553
+ def _replace_input_defaults_with_placeholders(data: dict[str, Any]) -> dict[str, Any]:
554
+ """Replace top-level input defaults with Jinja2 placeholders."""
555
+
556
+ modified = copy.deepcopy(data)
557
+ inputs = modified.get("inputs", [])
558
+ if isinstance(inputs, list):
559
+ for input_spec in inputs:
560
+ if isinstance(input_spec, dict) and "name" in input_spec and "default" in input_spec:
561
+ var_name = _sanitize_variable_name(str(input_spec["name"]))
562
+ input_spec["default"] = "{{ " + var_name + " }}"
563
+ elif isinstance(inputs, dict):
564
+ for name, input_def in inputs.items():
565
+ if isinstance(input_def, dict) and "default" in input_def:
566
+ var_name = _sanitize_variable_name(str(name))
567
+ input_def["default"] = "{{ " + var_name + " }}"
568
+ return modified
569
+
570
+
571
+ def _sanitize_variable_name(name: str) -> str:
572
+ """Convert a name to a valid Jinja2 variable name."""
573
+
574
+ sanitized = re.sub(r"[^\w]", "_", name.lower())
575
+ sanitized = re.sub(r"_+", "_", sanitized)
576
+ return sanitized.strip("_")
577
+
578
+
579
+ def _convert_templateid_to_includes(
580
+ yaml_text: str,
581
+ subtemplates: Mapping[str, Mapping[str, Any]],
582
+ base_name: str,
583
+ ) -> str:
584
+ """Convert templateId markers in YAML to Jinja2 include syntax."""
585
+
586
+ def replace_with_include(match: re.Match[str], template_file: str) -> str:
587
+ name_value = match.group(1).strip()
588
+ if not (name_value.startswith("'") or name_value.startswith('"')):
589
+ name_value = f"'{name_value}'"
590
+ return f"{{% with _subgraph_name = {name_value} %}}{{% include '{template_file}' %}}{{% endwith %}}"
591
+
592
+ for subtemplate_id in subtemplates:
593
+ template_filename = f"{base_name}_{subtemplate_id}.yaml.j2"
594
+ yaml_text = re.sub(
595
+ rf"^\s*templateId:\s*{re.escape(subtemplate_id)}\s*\n\s*_subgraph_name:\s*(.+?)\s*$",
596
+ lambda m: replace_with_include(m, template_filename),
597
+ yaml_text,
598
+ flags=re.MULTILINE,
599
+ )
600
+ return yaml_text
601
+
602
+
603
+ def _build_subgraph_processing_queue(data: dict[str, Any]) -> list[tuple[int, str]]:
604
+ """Build subgraph paths ordered deepest-first."""
605
+
606
+ results: list[tuple[int, str]] = []
607
+ stack: list[tuple[dict[str, Any], str, int]] = [(data, "", 0)]
608
+
609
+ while stack:
610
+ spec, current_path, depth = stack.pop()
611
+ spec_name = spec.get("name", "unnamed")
612
+ path = f"{current_path}{PATH_SEPARATOR}{spec_name}" if current_path else str(spec_name)
613
+ results.append((depth, path))
614
+
615
+ tasks = spec.get("implementation", {}).get("graph", {}).get("tasks", {})
616
+ for task_name, task_data in tasks.items():
617
+ if not isinstance(task_data, dict):
618
+ continue
619
+ component_ref = task_data.get("componentRef")
620
+ if not isinstance(component_ref, dict):
621
+ continue
622
+ nested_spec = component_ref.get("spec", {})
623
+ if utils.is_subgraph_spec(nested_spec):
624
+ stack.append((nested_spec, f"{path}{PATH_SEPARATOR}{task_name}", depth + 1))
625
+
626
+ return sorted(results, key=lambda item: (-item[0], item[1]))
627
+
628
+
629
+ def _get_task_component_ref(spec: dict[str, Any], task_name: str) -> tuple[dict[str, Any], dict[str, Any]]:
630
+ """Return ``(componentRef, nested_spec)`` for a task in a spec graph."""
631
+
632
+ tasks = spec.get("implementation", {}).get("graph", {}).get("tasks", {})
633
+ task_data = tasks.get(task_name, {})
634
+ component_ref = task_data.get("componentRef", {})
635
+ nested_spec = component_ref.get("spec", {}) if isinstance(component_ref, dict) else {}
636
+ return component_ref, nested_spec
637
+
638
+
639
+ def _get_subgraph_by_path(data: dict[str, Any], path: str) -> tuple[dict[str, Any], dict[str, Any]] | None:
640
+ """Resolve a subgraph's componentRef and spec by queue path."""
641
+
642
+ path_parts = path.split(PATH_SEPARATOR)
643
+ if len(path_parts) < 3:
644
+ return None
645
+ current_spec = data
646
+ for i in range(1, len(path_parts) - 2, 2):
647
+ task_name = path_parts[i]
648
+ _, current_spec = _get_task_component_ref(current_spec, task_name)
649
+
650
+ parent_task_name = path_parts[-2]
651
+ component_ref, spec = _get_task_component_ref(current_spec, parent_task_name)
652
+ if not spec:
653
+ return None
654
+ return component_ref, spec
655
+
656
+
657
+ def _spec_hash(spec: dict[str, Any]) -> str:
658
+ """Compute a hash key for a spec dictionary, ignoring top-level name."""
659
+
660
+ spec_for_hash = {k: v for k, v in spec.items() if k != "name"}
661
+ return json.dumps(spec_for_hash, sort_keys=True)
662
+
663
+
664
+ def _process_subgraphs_to_subtemplates(
665
+ data: dict[str, Any],
666
+ logger: Logger | None = None,
667
+ ) -> tuple[dict[str, Any], dict[str, dict[str, Any]]]:
668
+ """Extract subgraph specs into reusable subtemplate records."""
669
+
670
+ log = logger or get_default_logger()
671
+ working = copy.deepcopy(data)
672
+ queue = _build_subgraph_processing_queue(working)
673
+ subtemplates_by_hash: dict[str, dict[str, Any]] = {}
674
+ subtemplate_counter = 0
675
+
676
+ for depth, path in queue:
677
+ if depth == 0:
678
+ continue
679
+
680
+ result = _get_subgraph_by_path(working, path)
681
+ if not result:
682
+ continue
683
+ component_ref, spec = result
684
+
685
+ spec_key = _spec_hash(spec)
686
+ spec_name = spec.get("name", "unnamed")
687
+ if spec_key in subtemplates_by_hash:
688
+ subtemplate_id = subtemplates_by_hash[spec_key]["id"]
689
+ log.info(f" ♻️ Reusing {subtemplate_id} for '{spec_name}'")
690
+ else:
691
+ subtemplate_id = f"subtemplate_{subtemplate_counter}"
692
+ subtemplate_counter += 1
693
+ spec_copy = copy.deepcopy(spec)
694
+ if "name" in spec_copy:
695
+ spec_copy["name"] = "{{ _subgraph_name }}"
696
+ subtemplates_by_hash[spec_key] = {"id": subtemplate_id, "spec": spec_copy, "path": path}
697
+ log.info(f" 📦 Created {subtemplate_id} for '{spec_name}'")
698
+
699
+ component_ref["spec"] = {"templateId": subtemplate_id, "_subgraph_name": spec_name}
700
+
701
+ subtemplates = {
702
+ info["id"]: {"spec": info["spec"], "path": info["path"]}
703
+ for info in subtemplates_by_hash.values()
704
+ }
705
+ return working, subtemplates
706
+
707
+
708
+ __all__ = [
709
+ "DehydrateChoice",
710
+ "Jinja2ExportResult",
711
+ "PipelineDehydrator",
712
+ "PATH_SEPARATOR",
713
+ "_build_subgraph_processing_queue",
714
+ "_convert_templateid_to_includes",
715
+ "_extract_input_defaults",
716
+ "_get_subgraph_by_path",
717
+ "_process_subgraphs_to_subtemplates",
718
+ "_replace_input_defaults_with_placeholders",
719
+ "_sanitize_variable_name",
720
+ ]