tactus 0.31.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. tactus/__init__.py +49 -0
  2. tactus/adapters/__init__.py +9 -0
  3. tactus/adapters/broker_log.py +76 -0
  4. tactus/adapters/cli_hitl.py +189 -0
  5. tactus/adapters/cli_log.py +223 -0
  6. tactus/adapters/cost_collector_log.py +56 -0
  7. tactus/adapters/file_storage.py +367 -0
  8. tactus/adapters/http_callback_log.py +109 -0
  9. tactus/adapters/ide_log.py +71 -0
  10. tactus/adapters/lua_tools.py +336 -0
  11. tactus/adapters/mcp.py +289 -0
  12. tactus/adapters/mcp_manager.py +196 -0
  13. tactus/adapters/memory.py +53 -0
  14. tactus/adapters/plugins.py +419 -0
  15. tactus/backends/http_backend.py +58 -0
  16. tactus/backends/model_backend.py +35 -0
  17. tactus/backends/pytorch_backend.py +110 -0
  18. tactus/broker/__init__.py +12 -0
  19. tactus/broker/client.py +247 -0
  20. tactus/broker/protocol.py +183 -0
  21. tactus/broker/server.py +1123 -0
  22. tactus/broker/stdio.py +12 -0
  23. tactus/cli/__init__.py +7 -0
  24. tactus/cli/app.py +2245 -0
  25. tactus/cli/commands/__init__.py +0 -0
  26. tactus/core/__init__.py +32 -0
  27. tactus/core/config_manager.py +790 -0
  28. tactus/core/dependencies/__init__.py +14 -0
  29. tactus/core/dependencies/registry.py +180 -0
  30. tactus/core/dsl_stubs.py +2117 -0
  31. tactus/core/exceptions.py +66 -0
  32. tactus/core/execution_context.py +480 -0
  33. tactus/core/lua_sandbox.py +508 -0
  34. tactus/core/message_history_manager.py +236 -0
  35. tactus/core/mocking.py +286 -0
  36. tactus/core/output_validator.py +291 -0
  37. tactus/core/registry.py +499 -0
  38. tactus/core/runtime.py +2907 -0
  39. tactus/core/template_resolver.py +142 -0
  40. tactus/core/yaml_parser.py +301 -0
  41. tactus/docker/Dockerfile +61 -0
  42. tactus/docker/entrypoint.sh +69 -0
  43. tactus/dspy/__init__.py +39 -0
  44. tactus/dspy/agent.py +1144 -0
  45. tactus/dspy/broker_lm.py +181 -0
  46. tactus/dspy/config.py +212 -0
  47. tactus/dspy/history.py +196 -0
  48. tactus/dspy/module.py +405 -0
  49. tactus/dspy/prediction.py +318 -0
  50. tactus/dspy/signature.py +185 -0
  51. tactus/formatting/__init__.py +7 -0
  52. tactus/formatting/formatter.py +437 -0
  53. tactus/ide/__init__.py +9 -0
  54. tactus/ide/coding_assistant.py +343 -0
  55. tactus/ide/server.py +2223 -0
  56. tactus/primitives/__init__.py +49 -0
  57. tactus/primitives/control.py +168 -0
  58. tactus/primitives/file.py +229 -0
  59. tactus/primitives/handles.py +378 -0
  60. tactus/primitives/host.py +94 -0
  61. tactus/primitives/human.py +342 -0
  62. tactus/primitives/json.py +189 -0
  63. tactus/primitives/log.py +187 -0
  64. tactus/primitives/message_history.py +157 -0
  65. tactus/primitives/model.py +163 -0
  66. tactus/primitives/procedure.py +564 -0
  67. tactus/primitives/procedure_callable.py +318 -0
  68. tactus/primitives/retry.py +155 -0
  69. tactus/primitives/session.py +152 -0
  70. tactus/primitives/state.py +182 -0
  71. tactus/primitives/step.py +209 -0
  72. tactus/primitives/system.py +93 -0
  73. tactus/primitives/tool.py +375 -0
  74. tactus/primitives/tool_handle.py +279 -0
  75. tactus/primitives/toolset.py +229 -0
  76. tactus/protocols/__init__.py +38 -0
  77. tactus/protocols/chat_recorder.py +81 -0
  78. tactus/protocols/config.py +97 -0
  79. tactus/protocols/cost.py +31 -0
  80. tactus/protocols/hitl.py +71 -0
  81. tactus/protocols/log_handler.py +27 -0
  82. tactus/protocols/models.py +355 -0
  83. tactus/protocols/result.py +33 -0
  84. tactus/protocols/storage.py +90 -0
  85. tactus/providers/__init__.py +13 -0
  86. tactus/providers/base.py +92 -0
  87. tactus/providers/bedrock.py +117 -0
  88. tactus/providers/google.py +105 -0
  89. tactus/providers/openai.py +98 -0
  90. tactus/sandbox/__init__.py +63 -0
  91. tactus/sandbox/config.py +171 -0
  92. tactus/sandbox/container_runner.py +1099 -0
  93. tactus/sandbox/docker_manager.py +433 -0
  94. tactus/sandbox/entrypoint.py +227 -0
  95. tactus/sandbox/protocol.py +213 -0
  96. tactus/stdlib/__init__.py +10 -0
  97. tactus/stdlib/io/__init__.py +13 -0
  98. tactus/stdlib/io/csv.py +88 -0
  99. tactus/stdlib/io/excel.py +136 -0
  100. tactus/stdlib/io/file.py +90 -0
  101. tactus/stdlib/io/fs.py +154 -0
  102. tactus/stdlib/io/hdf5.py +121 -0
  103. tactus/stdlib/io/json.py +109 -0
  104. tactus/stdlib/io/parquet.py +83 -0
  105. tactus/stdlib/io/tsv.py +88 -0
  106. tactus/stdlib/loader.py +274 -0
  107. tactus/stdlib/tac/tactus/tools/done.tac +33 -0
  108. tactus/stdlib/tac/tactus/tools/log.tac +50 -0
  109. tactus/testing/README.md +273 -0
  110. tactus/testing/__init__.py +61 -0
  111. tactus/testing/behave_integration.py +380 -0
  112. tactus/testing/context.py +486 -0
  113. tactus/testing/eval_models.py +114 -0
  114. tactus/testing/evaluation_runner.py +222 -0
  115. tactus/testing/evaluators.py +634 -0
  116. tactus/testing/events.py +94 -0
  117. tactus/testing/gherkin_parser.py +134 -0
  118. tactus/testing/mock_agent.py +315 -0
  119. tactus/testing/mock_dependencies.py +234 -0
  120. tactus/testing/mock_hitl.py +171 -0
  121. tactus/testing/mock_registry.py +168 -0
  122. tactus/testing/mock_tools.py +133 -0
  123. tactus/testing/models.py +115 -0
  124. tactus/testing/pydantic_eval_runner.py +508 -0
  125. tactus/testing/steps/__init__.py +13 -0
  126. tactus/testing/steps/builtin.py +902 -0
  127. tactus/testing/steps/custom.py +69 -0
  128. tactus/testing/steps/registry.py +68 -0
  129. tactus/testing/test_runner.py +489 -0
  130. tactus/tracing/__init__.py +5 -0
  131. tactus/tracing/trace_manager.py +417 -0
  132. tactus/utils/__init__.py +1 -0
  133. tactus/utils/cost_calculator.py +72 -0
  134. tactus/utils/model_pricing.py +132 -0
  135. tactus/utils/safe_file_library.py +502 -0
  136. tactus/utils/safe_libraries.py +234 -0
  137. tactus/validation/LuaLexerBase.py +66 -0
  138. tactus/validation/LuaParserBase.py +23 -0
  139. tactus/validation/README.md +224 -0
  140. tactus/validation/__init__.py +7 -0
  141. tactus/validation/error_listener.py +21 -0
  142. tactus/validation/generated/LuaLexer.interp +231 -0
  143. tactus/validation/generated/LuaLexer.py +5548 -0
  144. tactus/validation/generated/LuaLexer.tokens +124 -0
  145. tactus/validation/generated/LuaLexerBase.py +66 -0
  146. tactus/validation/generated/LuaParser.interp +173 -0
  147. tactus/validation/generated/LuaParser.py +6439 -0
  148. tactus/validation/generated/LuaParser.tokens +124 -0
  149. tactus/validation/generated/LuaParserBase.py +23 -0
  150. tactus/validation/generated/LuaParserVisitor.py +118 -0
  151. tactus/validation/generated/__init__.py +7 -0
  152. tactus/validation/grammar/LuaLexer.g4 +123 -0
  153. tactus/validation/grammar/LuaParser.g4 +178 -0
  154. tactus/validation/semantic_visitor.py +817 -0
  155. tactus/validation/validator.py +157 -0
  156. tactus-0.31.2.dist-info/METADATA +1809 -0
  157. tactus-0.31.2.dist-info/RECORD +160 -0
  158. tactus-0.31.2.dist-info/WHEEL +4 -0
  159. tactus-0.31.2.dist-info/entry_points.txt +2 -0
  160. tactus-0.31.2.dist-info/licenses/LICENSE +21 -0
tactus/cli/app.py ADDED
@@ -0,0 +1,2245 @@
1
+ """
2
+ Tactus CLI Application.
3
+
4
+ Main entry point for the Tactus command-line interface.
5
+ Provides commands for running, validating, and testing workflows.
6
+ """
7
+
8
+ # Disable Pydantic plugins for PyInstaller builds
9
+ # This prevents logfire (and other plugins) from being loaded via Pydantic's plugin system
10
+ # which causes errors when trying to inspect source code in frozen apps
11
+ import os
12
+
13
+ os.environ["PYDANTIC_DISABLE_PLUGINS"] = "1"
14
+
15
+ import asyncio
16
+ import json
17
+ from pathlib import Path
18
+ from typing import Any, Optional
19
+ import logging
20
+ import sys
21
+
22
+ import typer
23
+ from rich.console import Console
24
+ from rich.logging import RichHandler
25
+ from rich.panel import Panel
26
+ from rich.prompt import Prompt, Confirm
27
+ from rich.table import Table
28
+ from tactus.core import TactusRuntime
29
+ from tactus.core.yaml_parser import ProcedureYAMLParser, ProcedureConfigError
30
+ from tactus.validation import TactusValidator, ValidationMode
31
+ from tactus.formatting import TactusFormatter, FormattingError
32
+ from tactus.adapters.memory import MemoryStorage
33
+ from tactus.adapters.file_storage import FileStorage
34
+ from tactus.adapters.cli_hitl import CLIHITLHandler
35
+
36
+ # Setup rich console for pretty output
37
+ console = Console()
38
+
39
+ # Create Typer app
40
+ app = typer.Typer(
41
+ name="tactus", help="Tactus - Workflow automation with Lua DSL", add_completion=False
42
+ )
43
+
44
+
45
+ @app.callback(invoke_without_command=True)
46
+ def main_callback(
47
+ ctx: typer.Context,
48
+ version: bool = typer.Option(
49
+ False,
50
+ "--version",
51
+ "-V",
52
+ help="Show version and exit",
53
+ is_eager=True,
54
+ ),
55
+ ):
56
+ """Tactus CLI callback for global options."""
57
+ if version:
58
+ from tactus import __version__
59
+
60
+ console.print(f"Tactus version: [bold]{__version__}[/bold]")
61
+ raise typer.Exit()
62
+
63
+ # If no subcommand was invoked and version flag not set, show help
64
+ if ctx.invoked_subcommand is None:
65
+ console.print(ctx.get_help())
66
+ raise typer.Exit()
67
+
68
+
69
+ def load_tactus_config():
70
+ """
71
+ Load Tactus configuration from standard config locations.
72
+
73
+ Loads (lowest to highest precedence):
74
+ - system config (e.g. /etc/tactus/config.yml)
75
+ - user config (e.g. ~/.tactus/config.yml)
76
+ - project config (./.tactus/config.yml)
77
+
78
+ Environment variables always win over config files (we only set vars that don't already exist).
79
+
80
+ Returns:
81
+ dict: Configuration dictionary, or empty dict if no config found
82
+ """
83
+ try:
84
+ from tactus.core.config_manager import ConfigManager
85
+ import json
86
+
87
+ config_mgr = ConfigManager()
88
+
89
+ configs = []
90
+ for system_path in config_mgr._get_system_config_paths():
91
+ if system_path.exists():
92
+ cfg = config_mgr._load_yaml_file(system_path)
93
+ if cfg:
94
+ configs.append(cfg)
95
+
96
+ for user_path in config_mgr._get_user_config_paths():
97
+ if user_path.exists():
98
+ cfg = config_mgr._load_yaml_file(user_path)
99
+ if cfg:
100
+ configs.append(cfg)
101
+
102
+ project_path = Path.cwd() / ".tactus" / "config.yml"
103
+ if project_path.exists():
104
+ cfg = config_mgr._load_yaml_file(project_path)
105
+ if cfg:
106
+ configs.append(cfg)
107
+
108
+ merged = config_mgr._merge_configs(configs) if configs else {}
109
+
110
+ # Only set env vars that were not already set by the user/process.
111
+ existing_env = set(os.environ.keys())
112
+
113
+ for key, value in merged.items():
114
+ if key == "mcp_servers":
115
+ continue
116
+
117
+ if isinstance(value, (str, int, float, bool)):
118
+ env_key = key.upper()
119
+ if env_key not in existing_env:
120
+ os.environ[env_key] = str(value)
121
+ elif isinstance(value, list):
122
+ env_key = key.upper()
123
+ if env_key not in existing_env:
124
+ os.environ[env_key] = json.dumps(value)
125
+ elif isinstance(value, dict):
126
+ for nested_key, nested_value in value.items():
127
+ if isinstance(nested_value, (str, int, float, bool)):
128
+ env_key = f"{key.upper()}_{nested_key.upper()}"
129
+ if env_key not in existing_env:
130
+ os.environ[env_key] = str(nested_value)
131
+
132
+ return merged
133
+ except Exception as e:
134
+ logging.debug(f"Could not load Tactus config: {e}")
135
+ return {}
136
+
137
+
138
+ _LOG_LEVELS = {
139
+ "debug": logging.DEBUG,
140
+ "info": logging.INFO,
141
+ "warning": logging.WARNING,
142
+ "warn": logging.WARNING,
143
+ "error": logging.ERROR,
144
+ "critical": logging.CRITICAL,
145
+ }
146
+
147
+ _LOG_FORMATS = {"rich", "terminal", "raw"}
148
+
149
+
150
+ class _TerminalLogHandler(logging.Handler):
151
+ """Minimal, high-signal terminal logger (no timestamps/levels)."""
152
+
153
+ def __init__(self, console: Console):
154
+ super().__init__()
155
+ self._console = console
156
+ self.setFormatter(logging.Formatter("%(message)s"))
157
+
158
+ def emit(self, record: logging.LogRecord) -> None:
159
+ try:
160
+ message = self.format(record)
161
+
162
+ # Make procedure-level logs the most prominent.
163
+ if record.name.startswith("procedure"):
164
+ style = "bold"
165
+ elif record.levelno >= logging.ERROR:
166
+ style = "bold red"
167
+ elif record.levelno >= logging.WARNING:
168
+ style = "yellow"
169
+ elif record.levelno <= logging.DEBUG:
170
+ style = "dim"
171
+ else:
172
+ style = ""
173
+
174
+ self._console.print(message, style=style, markup=False, highlight=False)
175
+ except Exception:
176
+ self.handleError(record)
177
+
178
+
179
+ def setup_logging(
180
+ verbose: bool = False,
181
+ log_level: Optional[str] = None,
182
+ log_format: str = "rich",
183
+ ) -> None:
184
+ """Setup CLI logging (level + format)."""
185
+ if log_level is None:
186
+ level = logging.DEBUG if verbose else logging.INFO
187
+ else:
188
+ key = str(log_level).strip().lower()
189
+ if key not in _LOG_LEVELS:
190
+ raise typer.BadParameter(
191
+ f"Invalid --log-level '{log_level}'. "
192
+ f"Use one of: {', '.join(sorted(_LOG_LEVELS.keys()))}"
193
+ )
194
+ level = _LOG_LEVELS[key]
195
+
196
+ fmt = (log_format or "rich").strip().lower()
197
+ if fmt not in _LOG_FORMATS:
198
+ raise typer.BadParameter(
199
+ f"Invalid --log-format '{log_format}'. Use one of: {', '.join(sorted(_LOG_FORMATS))}"
200
+ )
201
+
202
+ # Default: rich logs (group repeated timestamps).
203
+ if fmt == "rich":
204
+ handler: logging.Handler = RichHandler(
205
+ console=console,
206
+ show_path=False,
207
+ rich_tracebacks=True,
208
+ omit_repeated_times=True,
209
+ )
210
+ handler.setFormatter(logging.Formatter("%(message)s"))
211
+ logging.basicConfig(level=level, format="%(message)s", handlers=[handler], force=True)
212
+ return
213
+
214
+ # Raw logs: one line per entry, CloudWatch-friendly.
215
+ if fmt == "raw":
216
+ handler = logging.StreamHandler(stream=sys.stderr)
217
+ handler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(name)s: %(message)s"))
218
+ logging.basicConfig(level=level, handlers=[handler], force=True)
219
+ return
220
+
221
+ # Terminal logs: no timestamps/levels, color by signal.
222
+ handler = _TerminalLogHandler(console)
223
+ logging.basicConfig(level=level, handlers=[handler], force=True)
224
+
225
+
226
+ def _parse_value(value_str: str, field_type: str) -> Any:
227
+ """
228
+ Parse a string value into the appropriate type.
229
+
230
+ Args:
231
+ value_str: The string value to parse
232
+ field_type: The expected type (string, number, boolean, array, object)
233
+
234
+ Returns:
235
+ The parsed value in the appropriate type
236
+ """
237
+ if field_type == "boolean":
238
+ return value_str.lower() in ("true", "yes", "1", "y")
239
+ elif field_type == "number":
240
+ try:
241
+ if "." in value_str:
242
+ return float(value_str)
243
+ return int(value_str)
244
+ except ValueError:
245
+ return 0
246
+ elif field_type == "array":
247
+ try:
248
+ return json.loads(value_str)
249
+ except json.JSONDecodeError:
250
+ # Try to parse as comma-separated values
251
+ if value_str.strip():
252
+ return [v.strip() for v in value_str.split(",")]
253
+ return []
254
+ elif field_type == "object":
255
+ try:
256
+ return json.loads(value_str)
257
+ except json.JSONDecodeError:
258
+ return {}
259
+ else:
260
+ return value_str
261
+
262
+
263
+ def _prompt_for_inputs(console: Console, input_schema: dict, provided_params: dict) -> dict:
264
+ """
265
+ Interactively prompt user for procedure inputs.
266
+
267
+ Displays all inputs with their types, descriptions, and defaults,
268
+ then prompts the user to confirm or modify each value.
269
+
270
+ Args:
271
+ console: Rich Console for output
272
+ input_schema: Dict of input name -> field definition
273
+ provided_params: Already provided --param values
274
+
275
+ Returns:
276
+ Dict of resolved input values
277
+ """
278
+ if not input_schema:
279
+ return provided_params.copy()
280
+
281
+ console.print(Panel("[bold]Procedure Inputs[/bold]", style="blue"))
282
+
283
+ # Display input schema summary
284
+ table = Table(title="Input Parameters")
285
+ table.add_column("Name", style="cyan")
286
+ table.add_column("Type", style="magenta")
287
+ table.add_column("Required", style="yellow")
288
+ table.add_column("Default", style="green")
289
+ table.add_column("Current", style="blue")
290
+
291
+ for name, field in input_schema.items():
292
+ required = "Yes" if field.get("required") else "No"
293
+ default = str(field.get("default", "-")) if field.get("default") is not None else "-"
294
+ current = str(provided_params.get(name, "-")) if name in provided_params else "-"
295
+ table.add_row(name, field.get("type", "string"), required, default, current)
296
+
297
+ console.print(table)
298
+ console.print()
299
+
300
+ # Prompt for each input
301
+ resolved = {}
302
+ for name, field in input_schema.items():
303
+ field_type = field.get("type", "string")
304
+ description = field.get("description", "")
305
+ required = field.get("required", False)
306
+ enum_values = field.get("enum")
307
+
308
+ # Determine current value (provided > default)
309
+ if name in provided_params:
310
+ current_value = provided_params[name]
311
+ elif field.get("default") is not None:
312
+ current_value = field.get("default")
313
+ else:
314
+ current_value = None
315
+
316
+ # Build prompt message
317
+ prompt_msg = f"[cyan]{name}[/cyan]"
318
+ if description:
319
+ prompt_msg += f" [dim]({description})[/dim]"
320
+ if required:
321
+ prompt_msg += " [yellow]*[/yellow]"
322
+
323
+ # Handle different types
324
+ if field_type == "boolean":
325
+ default_bool = bool(current_value) if current_value is not None else False
326
+ value = Confirm.ask(prompt_msg, default=default_bool, console=console)
327
+
328
+ elif enum_values and isinstance(enum_values, list):
329
+ # Show enum options
330
+ console.print(f"\n{prompt_msg}")
331
+ console.print("[dim]Options:[/dim]")
332
+ for i, opt in enumerate(enum_values, 1):
333
+ console.print(f" {i}. [cyan]{opt}[/cyan]")
334
+
335
+ # Find default index
336
+ default_idx = "1"
337
+ if current_value in enum_values:
338
+ default_idx = str(enum_values.index(current_value) + 1)
339
+
340
+ while True:
341
+ choice_str = Prompt.ask(
342
+ "Select option (number or value)",
343
+ default=default_idx,
344
+ console=console,
345
+ )
346
+ # Try as number first
347
+ try:
348
+ choice = int(choice_str)
349
+ if 1 <= choice <= len(enum_values):
350
+ value = enum_values[choice - 1]
351
+ break
352
+ except ValueError:
353
+ # Try as direct value
354
+ if choice_str in enum_values:
355
+ value = choice_str
356
+ break
357
+ console.print(
358
+ f"[red]Invalid choice. Enter 1-{len(enum_values)} or a valid option.[/red]"
359
+ )
360
+
361
+ elif field_type == "array":
362
+ # Format default as JSON string
363
+ if isinstance(current_value, list):
364
+ default_str = json.dumps(current_value)
365
+ elif current_value is not None:
366
+ default_str = str(current_value)
367
+ else:
368
+ default_str = "[]"
369
+
370
+ console.print(f"\n{prompt_msg}")
371
+ console.print("[dim]Enter JSON array (e.g., [1, 2, 3]) or comma-separated values[/dim]")
372
+ value_str = Prompt.ask("Value", default=default_str, console=console)
373
+ value = _parse_value(value_str, "array")
374
+
375
+ elif field_type == "object":
376
+ # Format default as JSON string
377
+ if isinstance(current_value, dict):
378
+ default_str = json.dumps(current_value)
379
+ elif current_value is not None:
380
+ default_str = str(current_value)
381
+ else:
382
+ default_str = "{}"
383
+
384
+ console.print(f"\n{prompt_msg}")
385
+ console.print('[dim]Enter JSON object (e.g., {"key": "value"})[/dim]')
386
+ value_str = Prompt.ask("Value", default=default_str, console=console)
387
+ value = _parse_value(value_str, "object")
388
+
389
+ elif field_type == "number":
390
+ default_str = str(current_value) if current_value is not None else ""
391
+ value_str = Prompt.ask(prompt_msg, default=default_str, console=console)
392
+ value = _parse_value(value_str, "number")
393
+
394
+ else:
395
+ # String or unknown type
396
+ default_str = str(current_value) if current_value is not None else ""
397
+ value = Prompt.ask(prompt_msg, default=default_str, console=console)
398
+
399
+ resolved[name] = value
400
+
401
+ console.print()
402
+ return resolved
403
+
404
+
405
+ def _check_missing_required_inputs(input_schema: dict, provided_params: dict) -> list:
406
+ """
407
+ Check for missing required inputs that have no defaults.
408
+
409
+ Args:
410
+ input_schema: Dict of input name -> field definition
411
+ provided_params: Provided parameter values
412
+
413
+ Returns:
414
+ List of missing required input names
415
+ """
416
+ missing = []
417
+ for name, field in input_schema.items():
418
+ if field.get("required", False):
419
+ if name not in provided_params and field.get("default") is None:
420
+ missing.append(name)
421
+ return missing
422
+
423
+
424
+ @app.command()
425
+ def run(
426
+ workflow_file: Path = typer.Argument(..., help="Path to workflow file (.tac)"),
427
+ storage: str = typer.Option("memory", help="Storage backend: memory, file"),
428
+ storage_path: Optional[Path] = typer.Option(None, help="Path for file storage"),
429
+ openai_api_key: Optional[str] = typer.Option(
430
+ None, envvar="OPENAI_API_KEY", help="OpenAI API key"
431
+ ),
432
+ verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable verbose logging"),
433
+ log_level: Optional[str] = typer.Option(
434
+ None, "--log-level", help="Log level: debug, info, warning, error, critical"
435
+ ),
436
+ log_format: str = typer.Option(
437
+ "rich", "--log-format", help="Log format: rich (default), terminal, raw"
438
+ ),
439
+ param: Optional[list[str]] = typer.Option(None, help="Parameters in format key=value"),
440
+ interactive: bool = typer.Option(
441
+ False, "--interactive", "-i", help="Interactively prompt for all inputs"
442
+ ),
443
+ mock_all: bool = typer.Option(
444
+ False, "--mock-all", help="Mock all tools (use mock responses for all tool calls)"
445
+ ),
446
+ real_all: bool = typer.Option(
447
+ False, "--real-all", help="Use real implementations for all tools (disable all mocks)"
448
+ ),
449
+ mock: Optional[list[str]] = typer.Option(None, "--mock", help="Mock specific tool(s) by name"),
450
+ real: Optional[list[str]] = typer.Option(
451
+ None, "--real", help="Use real implementation for specific tool(s)"
452
+ ),
453
+ sandbox: Optional[bool] = typer.Option(
454
+ None,
455
+ "--sandbox/--no-sandbox",
456
+ help="Run in Docker sandbox (default: required unless --no-sandbox). "
457
+ "Use --no-sandbox to run without isolation (security risk).",
458
+ ),
459
+ sandbox_broker: str = typer.Option(
460
+ "tcp",
461
+ "--sandbox-broker",
462
+ help="Broker transport for sandbox runtime: tcp (default), tls, or stdio (deprecated due to buffering issues).",
463
+ ),
464
+ sandbox_network: Optional[str] = typer.Option(
465
+ None,
466
+ "--sandbox-network",
467
+ help="Docker network mode for sandbox container (default: none for stdio; bridge for tcp/tls).",
468
+ ),
469
+ sandbox_broker_host: Optional[str] = typer.Option(
470
+ None,
471
+ "--sandbox-broker-host",
472
+ help="Broker hostname from inside the sandbox container (tcp/tls only).",
473
+ ),
474
+ ):
475
+ """
476
+ Run a Tactus workflow.
477
+
478
+ Examples:
479
+
480
+ # Run with memory storage
481
+ tactus run workflow.tac
482
+
483
+ # Run with file storage
484
+ tactus run workflow.tac --storage file --storage-path ./data
485
+
486
+ # Pass parameters
487
+ tactus run workflow.tac --param task="Analyze data" --param count=5
488
+
489
+ # Interactive mode - prompt for all inputs
490
+ tactus run workflow.tac -i
491
+
492
+ # Mock all tools (useful for testing without real API calls)
493
+ tactus run workflow.tac --mock-all
494
+
495
+ # Mock specific tools
496
+ tactus run workflow.tac --mock search --mock api_call
497
+
498
+ # Use real implementation for specific tools while mocking others
499
+ tactus run workflow.tac --mock-all --real done
500
+ """
501
+ setup_logging(verbose=verbose, log_level=log_level, log_format=log_format)
502
+
503
+ # Check if file exists
504
+ if not workflow_file.exists():
505
+ console.print(f"[red]Error:[/red] Workflow file not found: {workflow_file}")
506
+ raise typer.Exit(1)
507
+
508
+ # Determine format based on extension
509
+ file_format = "lua" if workflow_file.suffix in [".tac", ".lua"] else "yaml"
510
+
511
+ # Read workflow file
512
+ source_content = workflow_file.read_text()
513
+
514
+ # For Lua DSL files, extract input schema first
515
+ input_schema = {}
516
+ if file_format == "lua":
517
+ try:
518
+ validator = TactusValidator()
519
+ validation_result = validator.validate(source_content, ValidationMode.QUICK)
520
+ if validation_result.registry:
521
+ input_schema = validation_result.registry.input_schema or {}
522
+ except Exception as e:
523
+ # If validation fails, we'll continue without input schema
524
+ if verbose:
525
+ console.print(f"[dim]Warning: Could not extract input schema: {e}[/dim]")
526
+
527
+ # Parse parameters from CLI with type information from schema
528
+ context = {}
529
+ if param:
530
+ for p in param:
531
+ if "=" not in p:
532
+ console.print(
533
+ f"[red]Error:[/red] Invalid parameter format: {p} (expected key=value)"
534
+ )
535
+ raise typer.Exit(1)
536
+ key, value = p.split("=", 1)
537
+
538
+ # Use type information from schema if available
539
+ if input_schema and key in input_schema:
540
+ field_def = input_schema[key]
541
+ if isinstance(field_def, dict):
542
+ field_type = field_def.get("type", "string")
543
+ context[key] = _parse_value(value, field_type)
544
+ if verbose:
545
+ console.print(
546
+ f"[dim]Parsed {key} as {field_type}: {context[key]} (type: {type(context[key]).__name__})[/dim]"
547
+ )
548
+ else:
549
+ # Fallback to JSON parsing
550
+ try:
551
+ context[key] = json.loads(value)
552
+ except json.JSONDecodeError:
553
+ context[key] = value
554
+ else:
555
+ # No schema info, try to parse JSON values
556
+ try:
557
+ context[key] = json.loads(value)
558
+ if verbose:
559
+ console.print(
560
+ f"[dim]JSON parsed {key}: {context[key]} (type: {type(context[key]).__name__})[/dim]"
561
+ )
562
+ except json.JSONDecodeError:
563
+ context[key] = value
564
+ if verbose:
565
+ console.print(f"[dim]String parsed {key}: {context[key]}[/dim]")
566
+
567
+ # Handle interactive mode or missing required inputs
568
+ if input_schema:
569
+ missing_required = _check_missing_required_inputs(input_schema, context)
570
+
571
+ if interactive:
572
+ # Interactive mode: prompt for all inputs
573
+ context = _prompt_for_inputs(console, input_schema, context)
574
+ elif missing_required:
575
+ # Missing required inputs - prompt for them
576
+ console.print(
577
+ f"[yellow]Missing required inputs: {', '.join(missing_required)}[/yellow]\n"
578
+ )
579
+ context = _prompt_for_inputs(console, input_schema, context)
580
+
581
+ # Setup storage backend
582
+ if storage == "memory":
583
+ storage_backend = MemoryStorage()
584
+ elif storage == "file":
585
+ if not storage_path:
586
+ storage_path = Path.cwd() / ".tac" / "storage"
587
+ else:
588
+ # Ensure storage_path is a directory path, not a file path
589
+ storage_path = Path(storage_path)
590
+ if storage_path.is_file():
591
+ storage_path = storage_path.parent
592
+ storage_backend = FileStorage(storage_dir=str(storage_path))
593
+ else:
594
+ console.print(f"[red]Error:[/red] Unknown storage backend: {storage}")
595
+ raise typer.Exit(1)
596
+
597
+ # Setup HITL handler
598
+ hitl_handler = CLIHITLHandler(console=console)
599
+
600
+ # Load configuration cascade
601
+ from tactus.core.config_manager import ConfigManager
602
+
603
+ config_manager = ConfigManager()
604
+ merged_config = config_manager.load_cascade(workflow_file)
605
+
606
+ # CLI arguments override config values
607
+ # Get OpenAI API key: CLI param > config > environment
608
+ api_key = (
609
+ openai_api_key or merged_config.get("openai_api_key") or os.environ.get("OPENAI_API_KEY")
610
+ )
611
+
612
+ # Get tool paths from merged config
613
+ tool_paths = merged_config.get("tool_paths")
614
+
615
+ # Get MCP servers from merged config
616
+ mcp_servers = merged_config.get("mcp_servers", {})
617
+
618
+ # Handle sandbox mode
619
+ from tactus.sandbox import (
620
+ is_docker_available,
621
+ SandboxConfig,
622
+ ContainerRunner,
623
+ )
624
+
625
+ # Build sandbox config from merged config and CLI flag
626
+ sandbox_config_dict = merged_config.get("sandbox", {})
627
+ if sandbox is not None:
628
+ # CLI flag overrides config
629
+ sandbox_config_dict["enabled"] = sandbox
630
+ if sandbox_network is not None:
631
+ sandbox_config_dict["network"] = sandbox_network
632
+ if sandbox_broker_host is not None:
633
+ sandbox_config_dict["broker_host"] = sandbox_broker_host
634
+
635
+ sandbox_config_dict["broker_transport"] = sandbox_broker
636
+ if (
637
+ sandbox_network is None
638
+ and sandbox_broker in ("tcp", "tls")
639
+ and "network" not in sandbox_config_dict
640
+ ):
641
+ # Remote-mode requires container networking; default to bridge if user didn't specify.
642
+ sandbox_config_dict["network"] = "bridge"
643
+ sandbox_config = SandboxConfig(**sandbox_config_dict)
644
+
645
+ # Pass logging preferences through to the sandbox container so container stderr matches CLI UX.
646
+ sandbox_config.env.setdefault(
647
+ "TACTUS_LOG_LEVEL", str(log_level or ("debug" if verbose else "info"))
648
+ )
649
+ sandbox_config.env.setdefault("TACTUS_LOG_FORMAT", str(log_format))
650
+
651
+ # Check Docker availability
652
+ docker_available, docker_reason = is_docker_available()
653
+
654
+ # Determine if we should use sandbox
655
+ use_sandbox = sandbox_config.should_use_sandbox(docker_available)
656
+
657
+ if not use_sandbox:
658
+ if sandbox_config.is_explicitly_disabled():
659
+ # User explicitly disabled sandbox - show notice
660
+ console.print(
661
+ "[yellow][SANDBOX] Container isolation disabled (--no-sandbox or config).[/yellow]"
662
+ )
663
+ console.print("[yellow][SANDBOX] Proceeding without Docker isolation.[/yellow]")
664
+ elif not docker_available and not sandbox_config.should_error_if_unavailable():
665
+ # Sandbox is auto-mode (default): fall back when Docker is unavailable
666
+ console.print(
667
+ f"[yellow][SANDBOX] Docker not available ({docker_reason}); running without container isolation.[/yellow]"
668
+ )
669
+ elif sandbox_config.should_error_if_unavailable() and not docker_available:
670
+ # Sandbox required but Docker unavailable - ERROR
671
+ console.print(f"[red][SANDBOX ERROR] Docker not available: {docker_reason}[/red]")
672
+ console.print(
673
+ "[red][SANDBOX ERROR] Cannot run procedure without container isolation.[/red]"
674
+ )
675
+ console.print("[red][SANDBOX ERROR] Either:[/red]")
676
+ console.print("[red] - Start Docker Desktop / Docker daemon[/red]")
677
+ console.print(
678
+ "[red] - Use --no-sandbox flag to explicitly run without isolation (security risk)[/red]"
679
+ )
680
+ console.print(
681
+ "[red] - Set sandbox.enabled: false in config to permanently disable (security risk)[/red]"
682
+ )
683
+ raise typer.Exit(1)
684
+
685
+ # Note: CLI params have already been parsed and added to context above
686
+ # This section used to re-parse them, but that would override the
687
+ # properly JSON-parsed values with raw strings
688
+
689
+ # Create log handler for Rich formatting
690
+ from tactus.adapters.cli_log import CLILogHandler
691
+
692
+ log_handler = CLILogHandler(console)
693
+
694
+ # Suppress verbose runtime logging when using structured log handler
695
+ # This prevents duplicate output - we only want the clean structured logs
696
+ logging.getLogger("tactus.core.runtime").setLevel(logging.WARNING)
697
+ logging.getLogger("tactus.primitives").setLevel(logging.WARNING)
698
+
699
+ # Create runtime
700
+ procedure_id = f"cli-{workflow_file.stem}"
701
+ runtime = TactusRuntime(
702
+ procedure_id=procedure_id,
703
+ storage_backend=storage_backend,
704
+ hitl_handler=hitl_handler,
705
+ chat_recorder=None, # No chat recording in CLI mode
706
+ mcp_server=None, # Legacy parameter (deprecated)
707
+ mcp_servers=mcp_servers, # New multi-server support
708
+ openai_api_key=api_key,
709
+ log_handler=log_handler,
710
+ tool_paths=tool_paths,
711
+ source_file_path=str(workflow_file),
712
+ )
713
+
714
+ # Set up mocking based on CLI flags
715
+ if mock_all or real_all or mock or real:
716
+ from tactus.core.mocking import MockManager
717
+
718
+ # Create and configure mock manager
719
+ mock_manager = MockManager()
720
+ runtime.mock_manager = mock_manager
721
+
722
+ # Handle global flags
723
+ if mock_all:
724
+ mock_manager.enable_mock()
725
+ runtime.mock_all_agents = True
726
+ console.print("[yellow]Mocking enabled for all tools[/yellow]")
727
+ elif real_all:
728
+ mock_manager.disable_mock()
729
+ console.print("[blue]Using real implementations for all tools[/blue]")
730
+
731
+ # Handle specific tool mocking
732
+ if mock:
733
+ for tool_name in mock:
734
+ # Register a simple mock that returns a placeholder response
735
+ from tactus.core.mocking import MockConfig
736
+
737
+ mock_manager.register_mock(
738
+ tool_name,
739
+ MockConfig(
740
+ tool_name=tool_name,
741
+ static_result={
742
+ "mocked": True,
743
+ "tool": tool_name,
744
+ "message": f"Mock response for {tool_name}",
745
+ },
746
+ ),
747
+ )
748
+ mock_manager.enable_mock(tool_name)
749
+ console.print(f"[yellow]Mocking enabled for tool: {tool_name}[/yellow]")
750
+
751
+ # Handle specific tool real implementations
752
+ if real:
753
+ for tool_name in real:
754
+ mock_manager.disable_mock(tool_name)
755
+ console.print(f"[blue]Using real implementation for tool: {tool_name}[/blue]")
756
+
757
+ # Execute procedure
758
+ if use_sandbox:
759
+ console.print(
760
+ f"[blue]Running procedure in sandbox:[/blue] [bold]{workflow_file.name}[/bold] ({file_format} format)\n"
761
+ )
762
+ else:
763
+ console.print(
764
+ f"[blue]Running procedure:[/blue] [bold]{workflow_file.name}[/bold] ({file_format} format)\n"
765
+ )
766
+
767
+ try:
768
+ if use_sandbox:
769
+ # Host-side broker reads OpenAI credentials from the host process environment.
770
+ # Keep secrets OUT of the sandbox container by setting the env var only on the host.
771
+ if api_key:
772
+ os.environ["OPENAI_API_KEY"] = api_key
773
+
774
+ # Execute in Docker sandbox
775
+ runner = ContainerRunner(sandbox_config)
776
+ sandbox_result = asyncio.run(
777
+ runner.run(
778
+ source=source_content,
779
+ params=context,
780
+ source_file_path=str(workflow_file),
781
+ format=file_format,
782
+ )
783
+ )
784
+
785
+ # Convert sandbox result to the expected format
786
+ if sandbox_result.status.value == "success":
787
+ result = {
788
+ "success": True,
789
+ "result": sandbox_result.result,
790
+ "state": sandbox_result.metadata.get("state", {}),
791
+ "iterations": sandbox_result.metadata.get("iterations", 0),
792
+ "tools_used": sandbox_result.metadata.get("tools_used", []),
793
+ }
794
+ else:
795
+ result = {
796
+ "success": False,
797
+ "error": sandbox_result.error,
798
+ }
799
+ if sandbox_result.traceback and verbose:
800
+ console.print(f"[dim]{sandbox_result.traceback}[/dim]")
801
+ else:
802
+ # Execute directly (non-sandboxed)
803
+ result = asyncio.run(runtime.execute(source_content, context, format=file_format))
804
+
805
+ if result["success"]:
806
+ console.print("\n[green]✓ Procedure completed successfully[/green]\n")
807
+
808
+ # Display results
809
+ if result.get("result"):
810
+ console.print("\n[green]Result:[/green]")
811
+ display_result = result["result"]
812
+ try:
813
+ from tactus.protocols.result import TactusResult
814
+
815
+ if isinstance(display_result, TactusResult):
816
+ display_result = display_result.output
817
+ except Exception:
818
+ pass
819
+
820
+ console.print(f" {display_result}")
821
+
822
+ # Display state
823
+ if result.get("state"):
824
+ state_table = Table(title="Final State")
825
+ state_table.add_column("Key", style="cyan")
826
+ state_table.add_column("Value", style="magenta")
827
+
828
+ for key, value in result["state"].items():
829
+ state_table.add_row(key, str(value))
830
+
831
+ console.print(state_table)
832
+
833
+ # Display stats
834
+ console.print(f"\n[dim]Iterations: {result.get('iterations', 0)}[/dim]")
835
+ console.print(
836
+ f"[dim]Tools used: {', '.join(result.get('tools_used', [])) or 'None'}[/dim]"
837
+ )
838
+
839
+ else:
840
+ console.print("\n[red]✗ Workflow failed[/red]\n")
841
+ if result.get("error"):
842
+ console.print(f"[red]Error: {result['error']}[/red]")
843
+ raise typer.Exit(1)
844
+
845
+ except Exception as e:
846
+ console.print(f"\n[red]✗ Execution error: {e}[/red]")
847
+ if verbose:
848
+ console.print_exception()
849
+ raise typer.Exit(1)
850
+
851
+
852
+ # Sandbox subcommand group
853
+ sandbox_app = typer.Typer(help="Manage Docker sandbox for secure procedure execution")
854
+ app.add_typer(sandbox_app, name="sandbox")
855
+
856
+
857
+ @sandbox_app.command("status")
858
+ def sandbox_status():
859
+ """
860
+ Show Docker sandbox status and availability.
861
+
862
+ Displays whether Docker is available and if the sandbox image exists.
863
+ """
864
+ from tactus.sandbox import is_docker_available, DockerManager
865
+
866
+ # Check Docker availability
867
+ available, reason = is_docker_available()
868
+
869
+ console.print("\n[bold]Docker Sandbox Status[/bold]\n")
870
+
871
+ if available:
872
+ console.print("[green]Docker:[/green] Available")
873
+ else:
874
+ console.print(f"[red]Docker:[/red] Not available - {reason}")
875
+
876
+ # Check image status
877
+ manager = DockerManager()
878
+ if manager.image_exists():
879
+ version = manager.get_image_version() or "unknown"
880
+ console.print(
881
+ f"[green]Sandbox image:[/green] {manager.full_image_name} (version: {version})"
882
+ )
883
+ else:
884
+ console.print(f"[yellow]Sandbox image:[/yellow] Not built ({manager.full_image_name})")
885
+ console.print("[dim]Run 'tactus sandbox rebuild' to build the image[/dim]")
886
+
887
+ console.print()
888
+
889
+
890
+ @sandbox_app.command("rebuild")
891
+ def sandbox_rebuild(
892
+ verbose: bool = typer.Option(False, "--verbose", "-v", help="Show build output"),
893
+ force: bool = typer.Option(False, "--force", "-f", help="Force rebuild even if image exists"),
894
+ ):
895
+ """
896
+ Build or rebuild the Docker sandbox image.
897
+
898
+ Creates the sandbox image used for isolated procedure execution.
899
+ """
900
+ from pathlib import Path
901
+ from tactus.sandbox import is_docker_available, DockerManager
902
+ import tactus
903
+
904
+ # Check Docker availability
905
+ available, reason = is_docker_available()
906
+ if not available:
907
+ console.print(f"[red]Error:[/red] Docker not available - {reason}")
908
+ raise typer.Exit(1)
909
+
910
+ # Get Tactus package path for build context
911
+ tactus_path = Path(tactus.__file__).parent.parent
912
+ dockerfile_path = tactus_path / "tactus" / "docker" / "Dockerfile"
913
+
914
+ if not dockerfile_path.exists():
915
+ console.print(f"[red]Error:[/red] Dockerfile not found: {dockerfile_path}")
916
+ console.print("[dim]This may indicate an incomplete installation.[/dim]")
917
+ raise typer.Exit(1)
918
+
919
+ # Get version
920
+ version = getattr(tactus, "__version__", "dev")
921
+
922
+ manager = DockerManager()
923
+
924
+ if not force and manager.image_exists():
925
+ image_version = manager.get_image_version()
926
+ if image_version == version:
927
+ console.print(
928
+ f"[green]Image is up to date:[/green] {manager.full_image_name} (v{version})"
929
+ )
930
+ console.print("[dim]Use --force to rebuild anyway[/dim]")
931
+ return
932
+
933
+ console.print(f"[blue]Building sandbox image:[/blue] {manager.full_image_name}")
934
+ console.print(f"[dim]Version: {version}[/dim]")
935
+ console.print(f"[dim]Context: {tactus_path}[/dim]\n")
936
+
937
+ success, message = manager.build_image(
938
+ dockerfile_path=dockerfile_path,
939
+ context_path=tactus_path,
940
+ version=version,
941
+ verbose=verbose,
942
+ )
943
+
944
+ if success:
945
+ console.print("\n[green]Successfully built sandbox image[/green]")
946
+ else:
947
+ console.print(f"\n[red]Failed to build sandbox image:[/red] {message}")
948
+ raise typer.Exit(1)
949
+
950
+
951
+ @app.command()
952
+ def validate(
953
+ workflow_file: Path = typer.Argument(..., help="Path to workflow file (.tac or .lua)"),
954
+ verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable verbose logging"),
955
+ quick: bool = typer.Option(False, "--quick", help="Quick validation (syntax only)"),
956
+ ):
957
+ """
958
+ Validate a Tactus workflow file.
959
+
960
+ Examples:
961
+
962
+ tactus validate workflow.tac
963
+ tactus validate workflow.lua --quick
964
+ """
965
+ setup_logging(verbose)
966
+
967
+ # Check if file exists
968
+ if not workflow_file.exists():
969
+ console.print(f"[red]Error:[/red] Workflow file not found: {workflow_file}")
970
+ raise typer.Exit(1)
971
+
972
+ # Determine format based on extension
973
+ file_format = "lua" if workflow_file.suffix in [".tac", ".lua"] else "yaml"
974
+
975
+ # Read workflow file
976
+ source_content = workflow_file.read_text()
977
+
978
+ console.print(f"Validating: [bold]{workflow_file.name}[/bold] ({file_format} format)")
979
+
980
+ try:
981
+ if file_format == "lua":
982
+ # Use new validator for Lua DSL
983
+ validator = TactusValidator()
984
+ mode = ValidationMode.QUICK if quick else ValidationMode.FULL
985
+ result = validator.validate(source_content, mode)
986
+
987
+ if result.valid:
988
+ console.print("\n[green]✓ DSL is valid[/green]\n")
989
+
990
+ # Display warnings
991
+ if result.warnings:
992
+ for warning in result.warnings:
993
+ console.print(f"[yellow]⚠ Warning:[/yellow] {warning.message}")
994
+ console.print()
995
+
996
+ if result.registry:
997
+ # Convert registry to config dict for display
998
+ config = {
999
+ "description": result.registry.description,
1000
+ "agents": {},
1001
+ "output": {},
1002
+ "params": {},
1003
+ }
1004
+ # Convert Pydantic models to dicts
1005
+ for name, agent in result.registry.agents.items():
1006
+ config["agents"][name] = {
1007
+ "system_prompt": agent.system_prompt,
1008
+ "provider": agent.provider,
1009
+ "model": agent.model,
1010
+ }
1011
+ for name, output in result.registry.output_schema.items():
1012
+ if output is not None:
1013
+ config["output"][name] = {
1014
+ "type": (
1015
+ output.get("type", "string")
1016
+ if isinstance(output, dict)
1017
+ else "string"
1018
+ ),
1019
+ "required": (
1020
+ output.get("required", False)
1021
+ if isinstance(output, dict)
1022
+ else False
1023
+ ),
1024
+ }
1025
+ for name, param in result.registry.input_schema.items():
1026
+ if param is not None:
1027
+ config["params"][name] = {
1028
+ "type": (
1029
+ param.get("type", "string")
1030
+ if isinstance(param, dict)
1031
+ else "string"
1032
+ ),
1033
+ "required": (
1034
+ param.get("required", False)
1035
+ if isinstance(param, dict)
1036
+ else False
1037
+ ),
1038
+ "default": (
1039
+ param.get("default") if isinstance(param, dict) else None
1040
+ ),
1041
+ }
1042
+ else:
1043
+ config = {}
1044
+ else:
1045
+ console.print("\n[red]✗ DSL validation failed[/red]\n")
1046
+ for error in result.errors:
1047
+ console.print(f"[red] • {error.message}[/red]")
1048
+ raise typer.Exit(1)
1049
+ else:
1050
+ # Parse YAML (legacy)
1051
+ config = ProcedureYAMLParser.parse(source_content)
1052
+
1053
+ # Display validation results
1054
+ console.print("\n[green]✓ YAML is valid[/green]\n")
1055
+
1056
+ # Show config details
1057
+ info_table = Table(title="Workflow Info")
1058
+ info_table.add_column("Property", style="cyan")
1059
+ info_table.add_column("Value", style="magenta")
1060
+
1061
+ info_table.add_row("Name", config.get("name", "N/A"))
1062
+ info_table.add_row("Version", config.get("version", "N/A"))
1063
+ info_table.add_row("Class", config.get("class", "LuaDSL"))
1064
+
1065
+ if config.get("description"):
1066
+ info_table.add_row("Description", config["description"])
1067
+
1068
+ console.print(info_table)
1069
+
1070
+ # Show agents
1071
+ if config.get("agents"):
1072
+ agents_table = Table(title="Agents")
1073
+ agents_table.add_column("Name", style="cyan")
1074
+ agents_table.add_column("System Prompt", style="magenta")
1075
+
1076
+ for name, agent_config in config["agents"].items():
1077
+ prompt = agent_config.get("system_prompt", "N/A")
1078
+ # Truncate long prompts
1079
+ if len(prompt) > 50:
1080
+ prompt = prompt[:47] + "..."
1081
+ agents_table.add_row(name, prompt)
1082
+
1083
+ console.print(agents_table)
1084
+
1085
+ # Show outputs
1086
+ if config.get("output"):
1087
+ outputs_table = Table(title="Outputs")
1088
+ outputs_table.add_column("Name", style="cyan")
1089
+ outputs_table.add_column("Type", style="magenta")
1090
+ outputs_table.add_column("Required", style="yellow")
1091
+
1092
+ for name, output_config in config["output"].items():
1093
+ outputs_table.add_row(
1094
+ name,
1095
+ output_config.get("type", "any"),
1096
+ "✓" if output_config.get("required", False) else "",
1097
+ )
1098
+
1099
+ console.print(outputs_table)
1100
+
1101
+ # Show parameters
1102
+ if config.get("params"):
1103
+ params_table = Table(title="Parameters")
1104
+ params_table.add_column("Name", style="cyan")
1105
+ params_table.add_column("Type", style="magenta")
1106
+ params_table.add_column("Default", style="yellow")
1107
+
1108
+ for name, param_config in config["params"].items():
1109
+ params_table.add_row(
1110
+ name, param_config.get("type", "any"), str(param_config.get("default", ""))
1111
+ )
1112
+
1113
+ console.print(params_table)
1114
+
1115
+ console.print("\n[green]Validation complete![/green]")
1116
+
1117
+ except ProcedureConfigError as e:
1118
+ console.print("\n[red]✗ Validation failed:[/red]\n")
1119
+ console.print(f"[red]{e}[/red]")
1120
+ raise typer.Exit(1)
1121
+
1122
+ except Exception as e:
1123
+ console.print("\n[red]✗ Unexpected error:[/red]\n")
1124
+ console.print(f"[red]{e}[/red]")
1125
+ if verbose:
1126
+ console.print_exception()
1127
+ raise typer.Exit(1)
1128
+
1129
+
1130
+ @app.command("format")
1131
+ def format_(
1132
+ workflow_file: Path = typer.Argument(..., help="Path to workflow file (.tac or .lua)"),
1133
+ check: bool = typer.Option(
1134
+ False,
1135
+ "--check",
1136
+ help="Don't write files back; exit 1 if changes are needed",
1137
+ ),
1138
+ stdout: bool = typer.Option(False, "--stdout", help="Write formatted code to stdout"),
1139
+ ):
1140
+ """
1141
+ Format a Tactus Lua DSL file.
1142
+
1143
+ Currently enforces semantic indentation using 2-space soft tabs.
1144
+ """
1145
+ if not workflow_file.exists():
1146
+ console.print(f"[red]Error:[/red] Workflow file not found: {workflow_file}")
1147
+ raise typer.Exit(1)
1148
+
1149
+ if workflow_file.suffix not in [".tac", ".lua"]:
1150
+ console.print("[red]Error:[/red] Formatting is only supported for .tac/.lua files")
1151
+ raise typer.Exit(1)
1152
+
1153
+ formatter = TactusFormatter(indent_width=2)
1154
+ source_content = workflow_file.read_text()
1155
+
1156
+ try:
1157
+ result = formatter.format_source(source_content)
1158
+ except FormattingError as e:
1159
+ console.print(f"[red]✗[/red] {e}")
1160
+ raise typer.Exit(1)
1161
+
1162
+ if stdout:
1163
+ sys.stdout.write(result.formatted)
1164
+ return
1165
+
1166
+ if check:
1167
+ if result.changed:
1168
+ console.print(f"[red]✗ Would reformat:[/red] {workflow_file}")
1169
+ raise typer.Exit(1)
1170
+ console.print(f"[green]✓ Already formatted:[/green] {workflow_file}")
1171
+ return
1172
+
1173
+ if result.changed:
1174
+ workflow_file.write_text(result.formatted)
1175
+ console.print(f"[green]✓ Formatted:[/green] {workflow_file}")
1176
+ else:
1177
+ console.print(f"[green]✓ No changes:[/green] {workflow_file}")
1178
+
1179
+
1180
+ @app.command()
1181
+ def info(
1182
+ workflow_file: Path = typer.Argument(..., help="Path to workflow file (.tac or .lua)"),
1183
+ ):
1184
+ """
1185
+ Display procedure metadata (agents, tools, parameters, outputs).
1186
+
1187
+ Examples:
1188
+
1189
+ tactus info workflow.tac
1190
+ """
1191
+ # Check if file exists
1192
+ if not workflow_file.exists():
1193
+ console.print(f"[red]Error:[/red] Workflow file not found: {workflow_file}")
1194
+ raise typer.Exit(1)
1195
+
1196
+ # Determine format based on extension
1197
+ file_format = "lua" if workflow_file.suffix in [".tac", ".lua"] else "yaml"
1198
+
1199
+ # Read workflow file
1200
+ source_content = workflow_file.read_text()
1201
+
1202
+ console.print(f"[blue]Procedure info:[/blue] [bold]{workflow_file.name}[/bold]\n")
1203
+
1204
+ try:
1205
+ if file_format == "lua":
1206
+ # Use validator to parse procedure
1207
+ validator = TactusValidator()
1208
+ result = validator.validate(source_content, ValidationMode.FULL)
1209
+
1210
+ if not result.valid:
1211
+ console.print("[red]✗ Invalid procedure - cannot display info[/red]\n")
1212
+ for error in result.errors:
1213
+ console.print(f" [red]•[/red] {error.message}")
1214
+ raise typer.Exit(1)
1215
+
1216
+ registry = result.registry
1217
+
1218
+ # Display procedure name
1219
+ if registry.description:
1220
+ console.print(f"[cyan]Description:[/cyan] {registry.description}\n")
1221
+
1222
+ # Show parameters (input)
1223
+ if registry.input_schema:
1224
+ console.print("[cyan]Parameters:[/cyan]")
1225
+ for name, field_config in registry.input_schema.items():
1226
+ if field_config is None:
1227
+ # Handle None field_config
1228
+ console.print(f" [bold]{name}[/bold]: any")
1229
+ elif isinstance(field_config, dict):
1230
+ field_type = field_config.get("type", "any")
1231
+ required = field_config.get("required", False)
1232
+ default = field_config.get("default")
1233
+ req_str = "[yellow](required)[/yellow]" if required else ""
1234
+ default_str = (
1235
+ f" [dim]default: {default}[/dim]" if default is not None else ""
1236
+ )
1237
+ console.print(f" [bold]{name}[/bold]: {field_type} {req_str}{default_str}")
1238
+ else:
1239
+ # Handle other types
1240
+ console.print(f" [bold]{name}[/bold]: {type(field_config).__name__}")
1241
+ console.print()
1242
+
1243
+ # Show outputs
1244
+ if registry.output_schema:
1245
+ console.print("[cyan]Outputs:[/cyan]")
1246
+ for name, field_config in registry.output_schema.items():
1247
+ if field_config is None:
1248
+ # Handle None field_config
1249
+ console.print(f" [bold]{name}[/bold]: any")
1250
+ elif isinstance(field_config, dict):
1251
+ field_type = field_config.get("type", "any")
1252
+ required = field_config.get("required", False)
1253
+ description = field_config.get("description", "")
1254
+ req_str = "[yellow](required)[/yellow]" if required else ""
1255
+ desc_str = f" [dim]- {description}[/dim]" if description else ""
1256
+ console.print(f" [bold]{name}[/bold]: {field_type} {req_str}{desc_str}")
1257
+ else:
1258
+ # Handle other types (shouldn't happen, but be safe)
1259
+ console.print(f" [bold]{name}[/bold]: {type(field_config).__name__}")
1260
+ console.print()
1261
+
1262
+ # Show agents
1263
+ if registry.agents:
1264
+ console.print("[cyan]Agents:[/cyan]")
1265
+ for name, agent_def in registry.agents.items():
1266
+ console.print(f" [bold]{name}[/bold]:")
1267
+ console.print(f" Provider: {agent_def.provider}")
1268
+ if agent_def.model:
1269
+ if isinstance(agent_def.model, str):
1270
+ model_str = agent_def.model
1271
+ elif isinstance(agent_def.model, dict):
1272
+ model_str = agent_def.model.get("name", "default")
1273
+ else:
1274
+ model_str = str(agent_def.model)
1275
+ console.print(f" Model: {model_str}")
1276
+ if agent_def.tools:
1277
+ tools_str = ", ".join(agent_def.tools)
1278
+ console.print(f" Tools: {tools_str}")
1279
+ if agent_def.system_prompt:
1280
+ # Show first 100 chars of system prompt
1281
+ prompt_preview = (
1282
+ agent_def.system_prompt[:100] + "..."
1283
+ if len(agent_def.system_prompt) > 100
1284
+ else agent_def.system_prompt
1285
+ )
1286
+ console.print(f" Prompt: [dim]{prompt_preview}[/dim]")
1287
+ console.print()
1288
+
1289
+ # Show specifications
1290
+ if registry.specifications:
1291
+ console.print(
1292
+ f"[cyan]Specifications:[/cyan] {len(registry.specifications)} scenario(s)"
1293
+ )
1294
+
1295
+ else:
1296
+ console.print("[red]Only .tac/.lua files are supported for info command[/red]")
1297
+ raise typer.Exit(1)
1298
+
1299
+ except Exception as e:
1300
+ console.print(f"\n[red]✗ Error displaying info:[/red] {e}")
1301
+ raise typer.Exit(1)
1302
+
1303
+
1304
+ @app.command()
1305
+ def test(
1306
+ procedure_file: Path = typer.Argument(..., help="Path to procedure file (.tac or .lua)"),
1307
+ runs: int = typer.Option(1, help="Number of runs per scenario (for consistency check)"),
1308
+ scenario: Optional[str] = typer.Option(None, help="Run specific scenario"),
1309
+ parallel: bool = typer.Option(True, help="Run scenarios in parallel"),
1310
+ workers: Optional[int] = typer.Option(None, help="Number of parallel workers"),
1311
+ mock: bool = typer.Option(False, help="Use mocked tools (fast, deterministic)"),
1312
+ mock_config: Optional[Path] = typer.Option(None, help="Path to mock config JSON"),
1313
+ param: Optional[list[str]] = typer.Option(None, help="Parameters in format key=value"),
1314
+ verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable verbose logging"),
1315
+ ):
1316
+ """
1317
+ Run BDD specifications for a procedure.
1318
+
1319
+ Can run scenarios once (standard test) or multiple times (consistency evaluation).
1320
+
1321
+ Examples:
1322
+
1323
+ # Run all scenarios once
1324
+ tactus test procedure.tac
1325
+
1326
+ # Check consistency (run 10 times per scenario)
1327
+ tactus test procedure.tac --runs 10
1328
+
1329
+ # Run with mocked tools
1330
+ tactus test procedure.tac --mock
1331
+
1332
+ # Run specific scenario
1333
+ tactus test procedure.tac --scenario "Agent completes research"
1334
+ """
1335
+ setup_logging(verbose)
1336
+
1337
+ if not procedure_file.exists():
1338
+ console.print(f"[red]Error:[/red] File not found: {procedure_file}")
1339
+ raise typer.Exit(1)
1340
+
1341
+ mode_str = "mocked" if (mock or mock_config) else "real"
1342
+ if runs > 1:
1343
+ console.print(
1344
+ Panel(f"Running Consistency Check ({runs} runs, {mode_str} mode)", style="blue")
1345
+ )
1346
+ else:
1347
+ console.print(Panel(f"Running BDD Tests ({mode_str} mode)", style="blue"))
1348
+
1349
+ try:
1350
+ from tactus.testing.test_runner import TactusTestRunner
1351
+ from tactus.testing.evaluation_runner import TactusEvaluationRunner
1352
+ from tactus.testing.mock_tools import create_default_mocks
1353
+ from tactus.validation import TactusValidator
1354
+ from tactus.core.config_manager import ConfigManager
1355
+ import json
1356
+
1357
+ # Load configuration and export all values as environment variables
1358
+ config_mgr = ConfigManager()
1359
+ config = config_mgr.load_cascade(procedure_file)
1360
+
1361
+ # Export config values as environment variables (matching ConfigManager's env_mappings)
1362
+ env_mappings = {
1363
+ "openai_api_key": "OPENAI_API_KEY",
1364
+ "google_api_key": "GOOGLE_API_KEY",
1365
+ ("aws", "access_key_id"): "AWS_ACCESS_KEY_ID",
1366
+ ("aws", "secret_access_key"): "AWS_SECRET_ACCESS_KEY",
1367
+ ("aws", "default_region"): "AWS_DEFAULT_REGION",
1368
+ ("aws", "profile"): "AWS_PROFILE",
1369
+ }
1370
+
1371
+ for config_key, env_key in env_mappings.items():
1372
+ # Skip if environment variable is already set
1373
+ if env_key in os.environ:
1374
+ continue
1375
+
1376
+ # Get value from config
1377
+ if isinstance(config_key, tuple):
1378
+ # Nested key (e.g., aws.access_key_id)
1379
+ value = config.get(config_key[0], {}).get(config_key[1])
1380
+ else:
1381
+ value = config.get(config_key)
1382
+
1383
+ # Set environment variable if value exists
1384
+ if value:
1385
+ os.environ[env_key] = str(value)
1386
+
1387
+ # Validate and extract specifications
1388
+ validator = TactusValidator()
1389
+ result = validator.validate_file(str(procedure_file))
1390
+
1391
+ if not result.valid:
1392
+ console.print("[red]✗ Validation failed:[/red]")
1393
+ for error in result.errors:
1394
+ console.print(f" [red]• {error.message}[/red]")
1395
+ raise typer.Exit(1)
1396
+
1397
+ # Check if specifications exist
1398
+ if not result.registry or not result.registry.gherkin_specifications:
1399
+ console.print("[yellow]⚠ No specifications found in procedure file[/yellow]")
1400
+ console.print("Add specifications using: specifications([[ ... ]])")
1401
+ raise typer.Exit(1)
1402
+
1403
+ # Load mock config if provided
1404
+ mock_tools = {}
1405
+ if mock or mock_config:
1406
+ if mock_config:
1407
+ mock_tools = json.loads(mock_config.read_text())
1408
+ console.print(f"[cyan]Loaded mock config: {mock_config}[/cyan]")
1409
+ else:
1410
+ mock_tools = create_default_mocks()
1411
+ console.print("[cyan]Using default mocks[/cyan]")
1412
+
1413
+ # Parse parameters
1414
+ test_params = {}
1415
+ if param:
1416
+ for p in param:
1417
+ if "=" in p:
1418
+ key, value = p.split("=", 1)
1419
+ test_params[key] = value
1420
+
1421
+ if runs > 1:
1422
+ # Run consistency evaluation
1423
+ evaluator = TactusEvaluationRunner(
1424
+ procedure_file, mock_tools=mock_tools, params=test_params
1425
+ )
1426
+ evaluator.setup(result.registry.gherkin_specifications)
1427
+
1428
+ if scenario:
1429
+ eval_results = [evaluator.evaluate_scenario(scenario, runs, parallel)]
1430
+ else:
1431
+ eval_results = evaluator.evaluate_all(runs, parallel)
1432
+
1433
+ _display_evaluation_results(eval_results)
1434
+ evaluator.cleanup()
1435
+
1436
+ else:
1437
+ # Run standard test
1438
+ runner = TactusTestRunner(procedure_file, mock_tools=mock_tools, params=test_params)
1439
+ runner.setup(result.registry.gherkin_specifications)
1440
+
1441
+ test_result = runner.run_tests(parallel=parallel, scenario_filter=scenario)
1442
+
1443
+ _display_test_results(test_result)
1444
+ runner.cleanup()
1445
+
1446
+ if test_result.failed_scenarios > 0:
1447
+ raise typer.Exit(1)
1448
+
1449
+ except Exception as e:
1450
+ console.print(f"[red]✗ Error:[/red] {e}")
1451
+ if verbose:
1452
+ console.print_exception()
1453
+ raise typer.Exit(1)
1454
+
1455
+
1456
+ def _display_test_results(test_result):
1457
+ """Display test results in Rich format."""
1458
+
1459
+ for feature in test_result.features:
1460
+ console.print(f"\n[bold]Feature:[/bold] {feature.name}")
1461
+
1462
+ for scenario in feature.scenarios:
1463
+ status_icon = "✓" if scenario.status == "passed" else "✗"
1464
+ status_color = "green" if scenario.status == "passed" else "red"
1465
+
1466
+ # Include execution metrics in scenario display
1467
+ metrics_parts = []
1468
+ if scenario.total_cost > 0:
1469
+ metrics_parts.append(f"💰 ${scenario.total_cost:.6f}")
1470
+ if scenario.llm_calls > 0:
1471
+ metrics_parts.append(f"🤖 {scenario.llm_calls} LLM calls")
1472
+ if scenario.iterations > 0:
1473
+ metrics_parts.append(f"🔄 {scenario.iterations} iterations")
1474
+ if scenario.tools_used:
1475
+ metrics_parts.append(f"🔧 {len(scenario.tools_used)} tools")
1476
+
1477
+ metrics_str = f" ({', '.join(metrics_parts)})" if metrics_parts else ""
1478
+ console.print(
1479
+ f" [{status_color}]{status_icon}[/{status_color}] "
1480
+ f"Scenario: {scenario.name} ({scenario.duration:.2f}s){metrics_str}"
1481
+ )
1482
+
1483
+ if scenario.status == "failed":
1484
+ for step in scenario.steps:
1485
+ if step.status == "failed":
1486
+ console.print(f" [red]Failed:[/red] {step.keyword} {step.message}")
1487
+ if step.error_message:
1488
+ console.print(f" {step.error_message}")
1489
+
1490
+ # Summary
1491
+ console.print(
1492
+ f"\n{test_result.total_scenarios} scenarios "
1493
+ f"([green]{test_result.passed_scenarios} passed[/green], "
1494
+ f"[red]{test_result.failed_scenarios} failed[/red])"
1495
+ )
1496
+
1497
+ # Execution metrics summary
1498
+ if test_result.total_cost > 0 or test_result.total_llm_calls > 0:
1499
+ console.print("\n[bold]Execution Metrics:[/bold]")
1500
+ if test_result.total_cost > 0:
1501
+ console.print(
1502
+ f" 💰 Cost: ${test_result.total_cost:.6f} ({test_result.total_tokens:,} tokens)"
1503
+ )
1504
+ if test_result.total_llm_calls > 0:
1505
+ console.print(f" 🤖 LLM Calls: {test_result.total_llm_calls}")
1506
+ if test_result.total_iterations > 0:
1507
+ console.print(f" 🔄 Iterations: {test_result.total_iterations}")
1508
+ if test_result.unique_tools_used:
1509
+ console.print(f" 🔧 Tools: {', '.join(test_result.unique_tools_used)}")
1510
+
1511
+
1512
+ def _display_evaluation_results(eval_results):
1513
+ """Display evaluation results with metrics."""
1514
+
1515
+ for eval_result in eval_results:
1516
+ console.print(f"\n[bold]Scenario:[/bold] {eval_result.scenario_name}")
1517
+
1518
+ # Success rate
1519
+ rate_color = "green" if eval_result.success_rate >= 0.9 else "yellow"
1520
+ console.print(
1521
+ f" Success Rate: [{rate_color}]{eval_result.success_rate:.1%}[/{rate_color}] "
1522
+ f"({eval_result.passed_runs}/{eval_result.total_runs})"
1523
+ )
1524
+
1525
+ # Timing
1526
+ console.print(
1527
+ f" Duration: {eval_result.mean_duration:.2f}s (±{eval_result.stddev_duration:.2f}s)"
1528
+ )
1529
+
1530
+ # Consistency
1531
+ consistency_color = "green" if eval_result.consistency_score >= 0.9 else "yellow"
1532
+ console.print(
1533
+ f" Consistency: [{consistency_color}]{eval_result.consistency_score:.1%}[/{consistency_color}]"
1534
+ )
1535
+
1536
+ # Flakiness warning
1537
+ if eval_result.is_flaky:
1538
+ console.print(" [yellow]⚠️ FLAKY - Inconsistent results detected[/yellow]")
1539
+
1540
+
1541
+ def _display_eval_results(report, runs: int, console):
1542
+ """Display evaluation results with per-task success rate breakdown."""
1543
+ from collections import defaultdict
1544
+ from rich.panel import Panel
1545
+ from rich import box
1546
+
1547
+ # Group results by original case name
1548
+ case_results = defaultdict(list)
1549
+ for case in report.cases:
1550
+ # Extract original case name from the case name (e.g., "simple_greeting_run1" -> "simple_greeting")
1551
+ case_name = case.name
1552
+ if "_run" in case_name:
1553
+ original_name = case_name.rsplit("_run", 1)[0]
1554
+ else:
1555
+ original_name = case_name
1556
+ case_results[original_name].append(case)
1557
+
1558
+ # Display per-task breakdown with details
1559
+ if runs > 1:
1560
+ console.print("\n[bold cyan]Evaluation Results by Task[/bold cyan]\n")
1561
+
1562
+ for task_name, cases in sorted(case_results.items()):
1563
+ total_runs = len(cases)
1564
+ # A case is successful if ALL its assertions passed
1565
+ successful_runs = sum(1 for c in cases if all(a.value for a in c.assertions.values()))
1566
+ success_rate = (successful_runs / total_runs * 100) if total_runs > 0 else 0
1567
+
1568
+ # Calculate per-evaluator pass rates
1569
+ evaluator_stats = defaultdict(lambda: {"passed": 0, "total": 0})
1570
+ for case in cases:
1571
+ for eval_name, assertion in case.assertions.items():
1572
+ evaluator_stats[eval_name]["total"] += 1
1573
+ if assertion.value:
1574
+ evaluator_stats[eval_name]["passed"] += 1
1575
+
1576
+ # Status styling
1577
+ status_icon = "✔" if success_rate >= 80 else "⚠" if success_rate >= 50 else "✗"
1578
+ rate_color = (
1579
+ "green" if success_rate >= 80 else "yellow" if success_rate >= 50 else "red"
1580
+ )
1581
+
1582
+ # Create task summary
1583
+ summary = f"[bold]{task_name}[/bold]\n"
1584
+ summary += f"[{rate_color}]{status_icon} Success Rate: {success_rate:.1f}% ({successful_runs}/{total_runs} runs passed all evaluators)[/{rate_color}]\n"
1585
+
1586
+ # Add evaluator breakdown
1587
+ summary += "\n[dim]Evaluator Breakdown:[/dim]\n"
1588
+ for eval_name, stats in evaluator_stats.items():
1589
+ eval_rate = (stats["passed"] / stats["total"] * 100) if stats["total"] > 0 else 0
1590
+ eval_color = "green" if eval_rate >= 80 else "yellow" if eval_rate >= 50 else "red"
1591
+ summary += f" [{eval_color}]{eval_name}: {eval_rate:.0f}% ({stats['passed']}/{stats['total']})[/{eval_color}]\n"
1592
+
1593
+ # Show detailed sample runs
1594
+ summary += "\n[dim]Sample Runs (showing first 3):[/dim]"
1595
+ for i, case in enumerate(cases[:3], 1): # Show first 3 runs
1596
+ all_passed = all(a.value for a in case.assertions.values())
1597
+ icon = "✔" if all_passed else "✗"
1598
+ summary += f"\n\n {icon} [bold]Run {i}:[/bold]"
1599
+
1600
+ # Show input
1601
+ summary += f"\n [dim]Input:[/dim] {case.inputs}"
1602
+
1603
+ # Show output (formatted nicely)
1604
+ summary += "\n [dim]Output:[/dim]"
1605
+ if isinstance(case.output, dict):
1606
+ for key, value in case.output.items():
1607
+ value_str = str(value)
1608
+ if len(value_str) > 200:
1609
+ value_str = value_str[:197] + "..."
1610
+ summary += f"\n {key}: {value_str}"
1611
+ else:
1612
+ output_str = str(case.output)
1613
+ if len(output_str) > 200:
1614
+ output_str = output_str[:197] + "..."
1615
+ summary += f" {output_str}"
1616
+
1617
+ # Show assertion results for this run
1618
+ summary += "\n [dim]Evaluators:[/dim]"
1619
+ for eval_name, assertion in case.assertions.items():
1620
+ result_icon = "✔" if assertion.value else "✗"
1621
+ summary += f"\n {result_icon} {eval_name}"
1622
+ # Show reason if available (e.g., from LLM judge)
1623
+ if hasattr(assertion, "reason") and assertion.reason:
1624
+ reason_lines = assertion.reason.split("\n")
1625
+ # Show first line inline, rest indented
1626
+ if reason_lines:
1627
+ summary += f": {reason_lines[0]}"
1628
+ for line in reason_lines[1:3]: # Show up to 2 more lines
1629
+ if line.strip():
1630
+ summary += f"\n {line.strip()}"
1631
+ if len(reason_lines) > 3:
1632
+ summary += "\n [dim]...[/dim]"
1633
+
1634
+ if len(cases) > 3:
1635
+ summary += f"\n\n [dim]... and {len(cases) - 3} more runs (use --verbose to see all)[/dim]"
1636
+
1637
+ console.print(Panel(summary, box=box.ROUNDED, border_style=rate_color))
1638
+ console.print()
1639
+ else:
1640
+ # Single run - just show the standard report
1641
+ console.print("\n[bold]Detailed Results:[/bold]")
1642
+ report.print(include_input=True, include_output=True)
1643
+
1644
+
1645
+ @app.command()
1646
+ def eval(
1647
+ procedure_file: Path = typer.Argument(..., help="Path to procedure file (.tac)"),
1648
+ runs: int = typer.Option(1, help="Number of runs per case"),
1649
+ parallel: bool = typer.Option(True, help="Run cases in parallel"),
1650
+ verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable verbose logging"),
1651
+ ):
1652
+ """
1653
+ Run Pydantic Evals evaluation on procedure.
1654
+
1655
+ Evaluates LLM agent quality, consistency, and performance using
1656
+ the Pydantic Evals framework. Requires evaluations() block in
1657
+ the procedure file.
1658
+
1659
+ Examples:
1660
+
1661
+ # Run evaluation once per case
1662
+ tactus eval procedure.tac
1663
+
1664
+ # Run evaluation 10 times per case to measure consistency
1665
+ tactus eval procedure.tac --runs 10
1666
+
1667
+ # Run sequentially (for debugging)
1668
+ tactus eval procedure.tac --no-parallel
1669
+ """
1670
+ setup_logging(verbose)
1671
+ load_tactus_config()
1672
+
1673
+ if not procedure_file.exists():
1674
+ console.print(f"[red]Error:[/red] File not found: {procedure_file}")
1675
+ raise typer.Exit(1)
1676
+
1677
+ try:
1678
+ from tactus.testing.pydantic_eval_runner import TactusPydanticEvalRunner
1679
+ from tactus.testing.eval_models import EvaluationConfig, EvalCase, EvaluatorConfig
1680
+ from tactus.validation import TactusValidator
1681
+
1682
+ # Validate and extract evaluations config
1683
+ validator = TactusValidator()
1684
+ result = validator.validate_file(str(procedure_file))
1685
+
1686
+ if not result.valid:
1687
+ console.print("[red]✗ Validation failed:[/red]")
1688
+ for error in result.errors:
1689
+ console.print(f" [red]• {error.message}[/red]")
1690
+ raise typer.Exit(1)
1691
+
1692
+ # Check if evaluations exist
1693
+ if not result.registry or not result.registry.pydantic_evaluations:
1694
+ console.print("[yellow]⚠ No evaluations found in procedure file[/yellow]")
1695
+ console.print(
1696
+ "Add evaluations using: evaluations({ dataset = {...}, evaluators = {...} })"
1697
+ )
1698
+ raise typer.Exit(1)
1699
+
1700
+ # Convert registry evaluations to EvaluationConfig
1701
+ eval_dict = result.registry.pydantic_evaluations
1702
+
1703
+ # Parse dataset
1704
+ dataset_cases = []
1705
+ for case_dict in eval_dict.get("dataset", []):
1706
+ dataset_cases.append(EvalCase(**case_dict))
1707
+
1708
+ # Parse evaluators
1709
+ evaluators = []
1710
+ for eval_dict_item in eval_dict.get("evaluators", []):
1711
+ evaluators.append(EvaluatorConfig(**eval_dict_item))
1712
+
1713
+ # Parse thresholds if present
1714
+ thresholds = None
1715
+ if "thresholds" in eval_dict:
1716
+ from tactus.testing.eval_models import EvaluationThresholds
1717
+
1718
+ thresholds = EvaluationThresholds(**eval_dict["thresholds"])
1719
+
1720
+ # Create evaluation config
1721
+ # Use runs from file if specified, otherwise use CLI parameter
1722
+ file_runs = eval_dict.get("runs", 1)
1723
+ actual_runs = (
1724
+ runs if runs != 1 else file_runs
1725
+ ) # CLI default is 1, so if it's 1, use file value
1726
+
1727
+ console.print(
1728
+ Panel(f"Running Pydantic Evals Evaluation ({actual_runs} runs per case)", style="blue")
1729
+ )
1730
+
1731
+ eval_config = EvaluationConfig(
1732
+ dataset=dataset_cases,
1733
+ evaluators=evaluators,
1734
+ runs=actual_runs,
1735
+ parallel=parallel,
1736
+ thresholds=thresholds,
1737
+ )
1738
+
1739
+ # Get OpenAI API key
1740
+ openai_api_key = os.environ.get("OPENAI_API_KEY")
1741
+ if not openai_api_key:
1742
+ console.print("[yellow]⚠ Warning: OPENAI_API_KEY not set[/yellow]")
1743
+
1744
+ # Run evaluation
1745
+ runner = TactusPydanticEvalRunner(
1746
+ procedure_file=procedure_file,
1747
+ eval_config=eval_config,
1748
+ openai_api_key=openai_api_key,
1749
+ )
1750
+
1751
+ report = runner.run_evaluation()
1752
+
1753
+ # Display results with custom formatting for success rates
1754
+ console.print("\n")
1755
+ _display_eval_results(report, actual_runs, console)
1756
+
1757
+ # Check thresholds
1758
+ passed, violations = runner.check_thresholds(report)
1759
+
1760
+ if not passed:
1761
+ console.print("\n[red]❌ Evaluation failed threshold checks:[/red]")
1762
+ for violation in violations:
1763
+ console.print(f" • {violation}")
1764
+ raise typer.Exit(code=1)
1765
+ elif eval_config.thresholds:
1766
+ # Only show success message if thresholds were configured
1767
+ console.print("\n[green]✓ All thresholds met[/green]")
1768
+
1769
+ except ImportError as e:
1770
+ console.print(f"[red]✗ Error:[/red] {e}")
1771
+ console.print("\n[yellow]Install pydantic-evals:[/yellow]")
1772
+ console.print(" pip install pydantic-evals")
1773
+ raise typer.Exit(1)
1774
+ except Exception as e:
1775
+ console.print(f"[red]✗ Error:[/red] {e}")
1776
+ if verbose:
1777
+ console.print_exception()
1778
+ raise typer.Exit(1)
1779
+
1780
+
1781
+ def _display_pydantic_eval_results(report):
1782
+ """Display Pydantic Evals results in Rich format."""
1783
+
1784
+ # Summary header
1785
+ console.print("\n[bold]Evaluation Results:[/bold]")
1786
+
1787
+ # Overall stats
1788
+ total_cases = len(report.cases) if hasattr(report, "cases") else 0
1789
+ if total_cases == 0:
1790
+ console.print("[yellow]No cases found in report[/yellow]")
1791
+ return
1792
+
1793
+ passed_cases = sum(
1794
+ 1 for case in report.cases if all(assertion for assertion in case.assertions.values())
1795
+ )
1796
+
1797
+ console.print(
1798
+ f" Cases: {total_cases} total, "
1799
+ f"[green]{passed_cases} passed[/green], "
1800
+ f"[red]{total_cases - passed_cases} failed[/red]"
1801
+ )
1802
+
1803
+ # Per-case results
1804
+ for case in report.cases:
1805
+ console.print(f"\n[bold cyan]Case:[/bold cyan] {case.name}")
1806
+
1807
+ # Assertions (pass/fail evaluators)
1808
+ if case.assertions:
1809
+ console.print(" [bold]Assertions:[/bold]")
1810
+ for name, passed in case.assertions.items():
1811
+ icon = "✓" if passed else "✗"
1812
+ color = "green" if passed else "red"
1813
+ console.print(f" [{color}]{icon}[/{color}] {name}")
1814
+
1815
+ # Scores (numeric evaluators like LLM judge)
1816
+ if case.scores:
1817
+ console.print(" [bold]Scores:[/bold]")
1818
+ for name, score in case.scores.items():
1819
+ console.print(f" {name}: {score:.2f}")
1820
+
1821
+ # Labels (categorical evaluators)
1822
+ if case.labels:
1823
+ console.print(" [bold]Labels:[/bold]")
1824
+ for name, label in case.labels.items():
1825
+ console.print(f" {name}: {label}")
1826
+
1827
+ # Duration
1828
+ console.print(f" Duration: {case.task_duration:.2f}s")
1829
+
1830
+ # Averages
1831
+ if report.cases:
1832
+ console.print("\n[bold]Averages:[/bold]")
1833
+
1834
+ # Average scores
1835
+ all_scores = {}
1836
+ for case in report.cases:
1837
+ for name, score in case.scores.items():
1838
+ if name not in all_scores:
1839
+ all_scores[name] = []
1840
+ all_scores[name].append(score)
1841
+
1842
+ for name, scores in all_scores.items():
1843
+ avg_score = sum(scores) / len(scores)
1844
+ console.print(f" {name}: {avg_score:.2f}")
1845
+
1846
+ # Average duration
1847
+ avg_duration = sum(case.task_duration for case in report.cases) / len(report.cases)
1848
+ console.print(f" Duration: {avg_duration:.2f}s")
1849
+
1850
+
1851
+ @app.command()
1852
+ def version():
1853
+ """Show Tactus version."""
1854
+ from tactus import __version__
1855
+
1856
+ console.print(f"Tactus version: [bold]{__version__}[/bold]")
1857
+
1858
+
1859
+ @app.command()
1860
+ def ide(
1861
+ port: Optional[int] = typer.Option(None, help="Backend port (auto-detected if not specified)"),
1862
+ frontend_port: int = typer.Option(3000, help="Frontend port"),
1863
+ no_browser: bool = typer.Option(False, "--no-browser", help="Don't open browser automatically"),
1864
+ verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable verbose logging"),
1865
+ ):
1866
+ """
1867
+ Start the Tactus IDE with integrated backend and frontend.
1868
+
1869
+ The IDE provides a Monaco-based editor with syntax highlighting,
1870
+ validation, and LSP features for Tactus DSL files.
1871
+
1872
+ Examples:
1873
+
1874
+ # Start IDE (auto-detects available port)
1875
+ tactus ide
1876
+
1877
+ # Start on specific port
1878
+ tactus ide --port 5001
1879
+
1880
+ # Start without opening browser
1881
+ tactus ide --no-browser
1882
+ """
1883
+ import socket
1884
+ import subprocess
1885
+ import threading
1886
+ import time
1887
+ import webbrowser
1888
+ from tactus.ide import create_app
1889
+
1890
+ setup_logging(verbose)
1891
+
1892
+ # Save initial working directory before any chdir operations
1893
+ initial_workspace = os.getcwd()
1894
+
1895
+ console.print(Panel("[bold blue]Starting Tactus IDE[/bold blue]", style="blue"))
1896
+
1897
+ # Find available port for backend
1898
+ def find_available_port(preferred_port=None):
1899
+ """Find an available port, preferring the specified port if available."""
1900
+ if preferred_port:
1901
+ sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1902
+ try:
1903
+ sock.bind(("127.0.0.1", preferred_port))
1904
+ sock.close()
1905
+ return preferred_port
1906
+ except OSError:
1907
+ pass
1908
+
1909
+ # Let OS assign an available port
1910
+ sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1911
+ sock.bind(("127.0.0.1", 0))
1912
+ assigned_port = sock.getsockname()[1]
1913
+ sock.close()
1914
+ return assigned_port
1915
+
1916
+ backend_port = find_available_port(port or 5001)
1917
+ console.print(f"Server port: [cyan]{backend_port}[/cyan]")
1918
+
1919
+ # Get paths - handle both development and PyInstaller frozen environments
1920
+ if getattr(sys, "frozen", False) and hasattr(sys, "_MEIPASS"):
1921
+ # Running in PyInstaller bundle
1922
+ bundle_dir = Path(sys._MEIPASS)
1923
+ frontend_dir = bundle_dir / "tactus-ide" / "frontend"
1924
+ dist_dir = frontend_dir / "dist"
1925
+ else:
1926
+ # Running in development
1927
+ project_root = Path(__file__).parent.parent.parent
1928
+ frontend_dir = project_root / "tactus-ide" / "frontend"
1929
+ dist_dir = frontend_dir / "dist"
1930
+
1931
+ # Check if frontend is built
1932
+ if not dist_dir.exists():
1933
+ console.print("\n[yellow]Frontend not built. Building now...[/yellow]")
1934
+
1935
+ if not frontend_dir.exists():
1936
+ console.print(f"[red]Error:[/red] Frontend directory not found: {frontend_dir}")
1937
+ raise typer.Exit(1)
1938
+
1939
+ # Set environment variable for backend URL
1940
+ env = os.environ.copy()
1941
+ env["VITE_BACKEND_URL"] = f"http://localhost:{backend_port}"
1942
+
1943
+ try:
1944
+ console.print("Running [cyan]npm run build[/cyan]...")
1945
+ result = subprocess.run(
1946
+ ["npm", "run", "build"], cwd=frontend_dir, env=env, capture_output=True, text=True
1947
+ )
1948
+
1949
+ if result.returncode != 0:
1950
+ console.print(f"[red]Build failed:[/red]\n{result.stderr}")
1951
+ raise typer.Exit(1)
1952
+
1953
+ console.print("[green]✓ Frontend built successfully[/green]\n")
1954
+ except FileNotFoundError:
1955
+ console.print("[red]Error:[/red] npm not found. Please install Node.js and npm.")
1956
+ raise typer.Exit(1)
1957
+
1958
+ # Start backend server (which also serves frontend) in thread
1959
+ def run_backend():
1960
+ app = create_app(initial_workspace=initial_workspace, frontend_dist_dir=dist_dir)
1961
+ app.run(host="127.0.0.1", port=backend_port, debug=False, threaded=True, use_reloader=False)
1962
+
1963
+ backend_thread = threading.Thread(target=run_backend, daemon=True)
1964
+ backend_thread.start()
1965
+ console.print(f"[green]✓ Server started on http://127.0.0.1:{backend_port}[/green]")
1966
+
1967
+ # Wait a moment for server to start
1968
+ time.sleep(1)
1969
+
1970
+ # Open browser
1971
+ ide_url = f"http://localhost:{backend_port}"
1972
+ if not no_browser:
1973
+ console.print(f"\n[cyan]Opening browser to {ide_url}[/cyan]")
1974
+ webbrowser.open(ide_url)
1975
+ else:
1976
+ console.print(f"\n[cyan]IDE available at: {ide_url}[/cyan]")
1977
+
1978
+ console.print("\n[dim]Press Ctrl+C to stop the IDE[/dim]\n")
1979
+
1980
+ # Keep running until interrupted
1981
+ try:
1982
+ while True:
1983
+ time.sleep(1)
1984
+ except KeyboardInterrupt:
1985
+ console.print("\n\n[yellow]Shutting down Tactus IDE...[/yellow]")
1986
+ console.print("[green]✓ IDE stopped[/green]")
1987
+
1988
+
1989
+ @app.command(name="trace-list")
1990
+ def trace_list(
1991
+ procedure: Optional[str] = typer.Option(None, help="Filter by procedure name"),
1992
+ status: Optional[str] = typer.Option(
1993
+ None, help="Filter by status (RUNNING, COMPLETED, FAILED)"
1994
+ ),
1995
+ limit: int = typer.Option(20, help="Maximum number of runs to display"),
1996
+ storage_path: Optional[Path] = typer.Option(None, help="Path for file storage"),
1997
+ ):
1998
+ """List execution traces."""
1999
+ from tactus.tracing import TraceManager
2000
+
2001
+ # Initialize storage
2002
+ if storage_path:
2003
+ storage = FileStorage(str(storage_path))
2004
+ else:
2005
+ storage = FileStorage()
2006
+
2007
+ trace_mgr = TraceManager(storage)
2008
+
2009
+ try:
2010
+ # Get runs
2011
+ runs = trace_mgr.list_runs(procedure_name=procedure, limit=limit)
2012
+
2013
+ # Filter by status if specified
2014
+ if status:
2015
+ runs = [r for r in runs if r.status == status]
2016
+
2017
+ if not runs:
2018
+ console.print("[yellow]No execution traces found[/yellow]")
2019
+ return
2020
+
2021
+ # Display table
2022
+ table = Table(title="Execution Traces")
2023
+ table.add_column("Run ID", style="cyan", no_wrap=True)
2024
+ table.add_column("Procedure", style="green")
2025
+ table.add_column("Status", style="yellow")
2026
+ table.add_column("Started", style="blue")
2027
+ table.add_column("Duration")
2028
+ table.add_column("Checkpoints", justify="right")
2029
+
2030
+ for run in runs:
2031
+ # Format duration
2032
+ if run.end_time:
2033
+ duration = run.end_time - run.start_time
2034
+ duration_str = f"{duration.total_seconds():.1f}s"
2035
+ else:
2036
+ duration_str = "running..."
2037
+
2038
+ # Color status
2039
+ status_color = {
2040
+ "RUNNING": "yellow",
2041
+ "COMPLETED": "green",
2042
+ "FAILED": "red",
2043
+ "PAUSED": "blue",
2044
+ }.get(run.status, "white")
2045
+
2046
+ table.add_row(
2047
+ run.run_id[:8], # Show first 8 chars of run ID
2048
+ run.procedure_name,
2049
+ f"[{status_color}]{run.status}[/{status_color}]",
2050
+ run.start_time.strftime("%Y-%m-%d %H:%M"),
2051
+ duration_str,
2052
+ str(len(run.execution_log)),
2053
+ )
2054
+
2055
+ console.print(table)
2056
+
2057
+ except Exception as e:
2058
+ console.print(f"[red]Error listing traces: {e}[/red]")
2059
+ raise typer.Exit(1)
2060
+
2061
+
2062
+ @app.command(name="trace-show")
2063
+ def trace_show(
2064
+ run_id: str = typer.Argument(..., help="Run ID to display"),
2065
+ position: Optional[int] = typer.Option(None, help="Show specific checkpoint position"),
2066
+ storage_path: Optional[Path] = typer.Option(None, help="Path for file storage"),
2067
+ ):
2068
+ """Show detailed trace information."""
2069
+ from tactus.tracing import TraceManager
2070
+ from rich.syntax import Syntax
2071
+ from rich.json import JSON
2072
+
2073
+ # Initialize storage
2074
+ if storage_path:
2075
+ storage = FileStorage(str(storage_path))
2076
+ else:
2077
+ storage = FileStorage()
2078
+
2079
+ trace_mgr = TraceManager(storage)
2080
+
2081
+ try:
2082
+ run = trace_mgr.get_run(run_id)
2083
+
2084
+ if position is not None:
2085
+ # Show specific checkpoint
2086
+ checkpoint = trace_mgr.get_checkpoint(run_id, position)
2087
+
2088
+ console.print(Panel(f"[bold]Checkpoint {position}[/bold]", style="blue"))
2089
+ console.print(f"[cyan]Type:[/cyan] {checkpoint.type}")
2090
+ console.print(f"[cyan]Timestamp:[/cyan] {checkpoint.timestamp}")
2091
+
2092
+ if checkpoint.duration_ms:
2093
+ console.print(f"[cyan]Duration:[/cyan] {checkpoint.duration_ms:.2f}ms")
2094
+
2095
+ if checkpoint.source_location:
2096
+ console.print("\n[bold]Source Location:[/bold]")
2097
+ console.print(f" [cyan]File:[/cyan] {checkpoint.source_location.file}")
2098
+ console.print(f" [cyan]Line:[/cyan] {checkpoint.source_location.line}")
2099
+ if checkpoint.source_location.function:
2100
+ console.print(f" [cyan]Function:[/cyan] {checkpoint.source_location.function}")
2101
+
2102
+ if checkpoint.source_location.code_context:
2103
+ console.print("\n[bold]Code Context:[/bold]")
2104
+ syntax = Syntax(
2105
+ checkpoint.source_location.code_context,
2106
+ "lua",
2107
+ theme="monokai",
2108
+ line_numbers=True,
2109
+ start_line=checkpoint.source_location.line - 3,
2110
+ )
2111
+ console.print(syntax)
2112
+
2113
+ if checkpoint.captured_vars:
2114
+ console.print("\n[bold]Captured State:[/bold]")
2115
+ console.print(JSON(str(checkpoint.captured_vars)))
2116
+
2117
+ console.print("\n[bold]Result:[/bold]")
2118
+ console.print(JSON(str(checkpoint.result)))
2119
+
2120
+ else:
2121
+ # Show full trace summary
2122
+ console.print(Panel(f"[bold]Execution Trace: {run_id}[/bold]", style="blue"))
2123
+ console.print(f"[cyan]Procedure:[/cyan] {run.procedure_name}")
2124
+ console.print(f"[cyan]File:[/cyan] {run.file_path}")
2125
+ console.print(f"[cyan]Status:[/cyan] {run.status}")
2126
+ console.print(f"[cyan]Started:[/cyan] {run.start_time}")
2127
+
2128
+ if run.end_time:
2129
+ duration = run.end_time - run.start_time
2130
+ console.print(f"[cyan]Ended:[/cyan] {run.end_time}")
2131
+ console.print(f"[cyan]Duration:[/cyan] {duration.total_seconds():.2f}s")
2132
+
2133
+ console.print(f"\n[bold]Checkpoints ({len(run.execution_log)}):[/bold]")
2134
+
2135
+ # Show checkpoint table
2136
+ table = Table()
2137
+ table.add_column("Pos", justify="right", style="cyan")
2138
+ table.add_column("Type", style="green")
2139
+ table.add_column("Duration", justify="right")
2140
+ table.add_column("Source", style="blue")
2141
+
2142
+ for cp in run.execution_log:
2143
+ duration_str = f"{cp.duration_ms:.1f}ms" if cp.duration_ms else "-"
2144
+
2145
+ source_str = ""
2146
+ if cp.source_location:
2147
+ source_str = f"{Path(cp.source_location.file).name}:{cp.source_location.line}"
2148
+
2149
+ table.add_row(
2150
+ str(cp.position),
2151
+ cp.type,
2152
+ duration_str,
2153
+ source_str,
2154
+ )
2155
+
2156
+ console.print(table)
2157
+
2158
+ # Show statistics
2159
+ stats = trace_mgr.get_statistics(run_id)
2160
+ console.print("\n[bold]Statistics:[/bold]")
2161
+ console.print(f" Total duration: {stats['total_duration_ms']:.2f}ms")
2162
+ console.print(f" Checkpoints with source locations: {stats['has_source_locations']}")
2163
+ console.print(" Checkpoints by type:")
2164
+ for cp_type, count in stats["checkpoints_by_type"].items():
2165
+ console.print(f" {cp_type}: {count}")
2166
+
2167
+ except FileNotFoundError:
2168
+ console.print(f"[red]Run {run_id} not found[/red]")
2169
+ raise typer.Exit(1)
2170
+ except IndexError:
2171
+ console.print(f"[red]Checkpoint position {position} out of range[/red]")
2172
+ raise typer.Exit(1)
2173
+ except Exception as e:
2174
+ console.print(f"[red]Error showing trace: {e}[/red]")
2175
+ raise typer.Exit(1)
2176
+
2177
+
2178
+ @app.command(name="trace-export")
2179
+ def trace_export(
2180
+ run_id: str = typer.Argument(..., help="Run ID to export"),
2181
+ output: Path = typer.Argument(..., help="Output file path"),
2182
+ format: str = typer.Option("json", help="Export format (json)"),
2183
+ storage_path: Optional[Path] = typer.Option(None, help="Path for file storage"),
2184
+ ):
2185
+ """Export trace to file."""
2186
+ from tactus.tracing import TraceManager
2187
+
2188
+ # Initialize storage
2189
+ if storage_path:
2190
+ storage = FileStorage(str(storage_path))
2191
+ else:
2192
+ storage = FileStorage()
2193
+
2194
+ trace_mgr = TraceManager(storage)
2195
+
2196
+ try:
2197
+ data = trace_mgr.export_trace(run_id, format)
2198
+
2199
+ output.write_text(data)
2200
+
2201
+ console.print(f"[green]Exported trace to {output}[/green]")
2202
+
2203
+ except FileNotFoundError:
2204
+ console.print(f"[red]Run {run_id} not found[/red]")
2205
+ raise typer.Exit(1)
2206
+ except ValueError as e:
2207
+ console.print(f"[red]Error: {e}[/red]")
2208
+ raise typer.Exit(1)
2209
+ except Exception as e:
2210
+ console.print(f"[red]Error exporting trace: {e}[/red]")
2211
+ raise typer.Exit(1)
2212
+
2213
+
2214
+ def main():
2215
+ """Main entry point for the CLI."""
2216
+ # Load configuration before processing any commands
2217
+ load_tactus_config()
2218
+
2219
+ # Check if user provided a direct file path (shortcut for 'run' command)
2220
+ # This allows: tactus procedure.tac instead of tactus run procedure.tac
2221
+ if len(sys.argv) > 1:
2222
+ first_arg = sys.argv[1]
2223
+ # Check if it's a file (not a subcommand or option)
2224
+ if not first_arg.startswith("-") and first_arg not in [
2225
+ "run",
2226
+ "validate",
2227
+ "test",
2228
+ "eval",
2229
+ "version",
2230
+ "ide",
2231
+ "trace-list",
2232
+ "trace-show",
2233
+ "trace-export",
2234
+ ]:
2235
+ # Check if it's a file that exists
2236
+ potential_file = Path(first_arg)
2237
+ if potential_file.exists() and potential_file.is_file():
2238
+ # Insert 'run' command before the file path
2239
+ sys.argv.insert(1, "run")
2240
+
2241
+ app()
2242
+
2243
+
2244
+ if __name__ == "__main__":
2245
+ main()