tactus 0.31.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. tactus/__init__.py +49 -0
  2. tactus/adapters/__init__.py +9 -0
  3. tactus/adapters/broker_log.py +76 -0
  4. tactus/adapters/cli_hitl.py +189 -0
  5. tactus/adapters/cli_log.py +223 -0
  6. tactus/adapters/cost_collector_log.py +56 -0
  7. tactus/adapters/file_storage.py +367 -0
  8. tactus/adapters/http_callback_log.py +109 -0
  9. tactus/adapters/ide_log.py +71 -0
  10. tactus/adapters/lua_tools.py +336 -0
  11. tactus/adapters/mcp.py +289 -0
  12. tactus/adapters/mcp_manager.py +196 -0
  13. tactus/adapters/memory.py +53 -0
  14. tactus/adapters/plugins.py +419 -0
  15. tactus/backends/http_backend.py +58 -0
  16. tactus/backends/model_backend.py +35 -0
  17. tactus/backends/pytorch_backend.py +110 -0
  18. tactus/broker/__init__.py +12 -0
  19. tactus/broker/client.py +247 -0
  20. tactus/broker/protocol.py +183 -0
  21. tactus/broker/server.py +1123 -0
  22. tactus/broker/stdio.py +12 -0
  23. tactus/cli/__init__.py +7 -0
  24. tactus/cli/app.py +2245 -0
  25. tactus/cli/commands/__init__.py +0 -0
  26. tactus/core/__init__.py +32 -0
  27. tactus/core/config_manager.py +790 -0
  28. tactus/core/dependencies/__init__.py +14 -0
  29. tactus/core/dependencies/registry.py +180 -0
  30. tactus/core/dsl_stubs.py +2117 -0
  31. tactus/core/exceptions.py +66 -0
  32. tactus/core/execution_context.py +480 -0
  33. tactus/core/lua_sandbox.py +508 -0
  34. tactus/core/message_history_manager.py +236 -0
  35. tactus/core/mocking.py +286 -0
  36. tactus/core/output_validator.py +291 -0
  37. tactus/core/registry.py +499 -0
  38. tactus/core/runtime.py +2907 -0
  39. tactus/core/template_resolver.py +142 -0
  40. tactus/core/yaml_parser.py +301 -0
  41. tactus/docker/Dockerfile +61 -0
  42. tactus/docker/entrypoint.sh +69 -0
  43. tactus/dspy/__init__.py +39 -0
  44. tactus/dspy/agent.py +1144 -0
  45. tactus/dspy/broker_lm.py +181 -0
  46. tactus/dspy/config.py +212 -0
  47. tactus/dspy/history.py +196 -0
  48. tactus/dspy/module.py +405 -0
  49. tactus/dspy/prediction.py +318 -0
  50. tactus/dspy/signature.py +185 -0
  51. tactus/formatting/__init__.py +7 -0
  52. tactus/formatting/formatter.py +437 -0
  53. tactus/ide/__init__.py +9 -0
  54. tactus/ide/coding_assistant.py +343 -0
  55. tactus/ide/server.py +2223 -0
  56. tactus/primitives/__init__.py +49 -0
  57. tactus/primitives/control.py +168 -0
  58. tactus/primitives/file.py +229 -0
  59. tactus/primitives/handles.py +378 -0
  60. tactus/primitives/host.py +94 -0
  61. tactus/primitives/human.py +342 -0
  62. tactus/primitives/json.py +189 -0
  63. tactus/primitives/log.py +187 -0
  64. tactus/primitives/message_history.py +157 -0
  65. tactus/primitives/model.py +163 -0
  66. tactus/primitives/procedure.py +564 -0
  67. tactus/primitives/procedure_callable.py +318 -0
  68. tactus/primitives/retry.py +155 -0
  69. tactus/primitives/session.py +152 -0
  70. tactus/primitives/state.py +182 -0
  71. tactus/primitives/step.py +209 -0
  72. tactus/primitives/system.py +93 -0
  73. tactus/primitives/tool.py +375 -0
  74. tactus/primitives/tool_handle.py +279 -0
  75. tactus/primitives/toolset.py +229 -0
  76. tactus/protocols/__init__.py +38 -0
  77. tactus/protocols/chat_recorder.py +81 -0
  78. tactus/protocols/config.py +97 -0
  79. tactus/protocols/cost.py +31 -0
  80. tactus/protocols/hitl.py +71 -0
  81. tactus/protocols/log_handler.py +27 -0
  82. tactus/protocols/models.py +355 -0
  83. tactus/protocols/result.py +33 -0
  84. tactus/protocols/storage.py +90 -0
  85. tactus/providers/__init__.py +13 -0
  86. tactus/providers/base.py +92 -0
  87. tactus/providers/bedrock.py +117 -0
  88. tactus/providers/google.py +105 -0
  89. tactus/providers/openai.py +98 -0
  90. tactus/sandbox/__init__.py +63 -0
  91. tactus/sandbox/config.py +171 -0
  92. tactus/sandbox/container_runner.py +1099 -0
  93. tactus/sandbox/docker_manager.py +433 -0
  94. tactus/sandbox/entrypoint.py +227 -0
  95. tactus/sandbox/protocol.py +213 -0
  96. tactus/stdlib/__init__.py +10 -0
  97. tactus/stdlib/io/__init__.py +13 -0
  98. tactus/stdlib/io/csv.py +88 -0
  99. tactus/stdlib/io/excel.py +136 -0
  100. tactus/stdlib/io/file.py +90 -0
  101. tactus/stdlib/io/fs.py +154 -0
  102. tactus/stdlib/io/hdf5.py +121 -0
  103. tactus/stdlib/io/json.py +109 -0
  104. tactus/stdlib/io/parquet.py +83 -0
  105. tactus/stdlib/io/tsv.py +88 -0
  106. tactus/stdlib/loader.py +274 -0
  107. tactus/stdlib/tac/tactus/tools/done.tac +33 -0
  108. tactus/stdlib/tac/tactus/tools/log.tac +50 -0
  109. tactus/testing/README.md +273 -0
  110. tactus/testing/__init__.py +61 -0
  111. tactus/testing/behave_integration.py +380 -0
  112. tactus/testing/context.py +486 -0
  113. tactus/testing/eval_models.py +114 -0
  114. tactus/testing/evaluation_runner.py +222 -0
  115. tactus/testing/evaluators.py +634 -0
  116. tactus/testing/events.py +94 -0
  117. tactus/testing/gherkin_parser.py +134 -0
  118. tactus/testing/mock_agent.py +315 -0
  119. tactus/testing/mock_dependencies.py +234 -0
  120. tactus/testing/mock_hitl.py +171 -0
  121. tactus/testing/mock_registry.py +168 -0
  122. tactus/testing/mock_tools.py +133 -0
  123. tactus/testing/models.py +115 -0
  124. tactus/testing/pydantic_eval_runner.py +508 -0
  125. tactus/testing/steps/__init__.py +13 -0
  126. tactus/testing/steps/builtin.py +902 -0
  127. tactus/testing/steps/custom.py +69 -0
  128. tactus/testing/steps/registry.py +68 -0
  129. tactus/testing/test_runner.py +489 -0
  130. tactus/tracing/__init__.py +5 -0
  131. tactus/tracing/trace_manager.py +417 -0
  132. tactus/utils/__init__.py +1 -0
  133. tactus/utils/cost_calculator.py +72 -0
  134. tactus/utils/model_pricing.py +132 -0
  135. tactus/utils/safe_file_library.py +502 -0
  136. tactus/utils/safe_libraries.py +234 -0
  137. tactus/validation/LuaLexerBase.py +66 -0
  138. tactus/validation/LuaParserBase.py +23 -0
  139. tactus/validation/README.md +224 -0
  140. tactus/validation/__init__.py +7 -0
  141. tactus/validation/error_listener.py +21 -0
  142. tactus/validation/generated/LuaLexer.interp +231 -0
  143. tactus/validation/generated/LuaLexer.py +5548 -0
  144. tactus/validation/generated/LuaLexer.tokens +124 -0
  145. tactus/validation/generated/LuaLexerBase.py +66 -0
  146. tactus/validation/generated/LuaParser.interp +173 -0
  147. tactus/validation/generated/LuaParser.py +6439 -0
  148. tactus/validation/generated/LuaParser.tokens +124 -0
  149. tactus/validation/generated/LuaParserBase.py +23 -0
  150. tactus/validation/generated/LuaParserVisitor.py +118 -0
  151. tactus/validation/generated/__init__.py +7 -0
  152. tactus/validation/grammar/LuaLexer.g4 +123 -0
  153. tactus/validation/grammar/LuaParser.g4 +178 -0
  154. tactus/validation/semantic_visitor.py +817 -0
  155. tactus/validation/validator.py +157 -0
  156. tactus-0.31.0.dist-info/METADATA +1809 -0
  157. tactus-0.31.0.dist-info/RECORD +160 -0
  158. tactus-0.31.0.dist-info/WHEEL +4 -0
  159. tactus-0.31.0.dist-info/entry_points.txt +2 -0
  160. tactus-0.31.0.dist-info/licenses/LICENSE +21 -0
tactus/ide/server.py ADDED
@@ -0,0 +1,2223 @@
1
+ """
2
+ Tactus IDE Backend Server.
3
+
4
+ Provides HTTP-based LSP server for the Tactus IDE.
5
+ """
6
+
7
+ import json
8
+ import logging
9
+ import os
10
+ import queue
11
+ import subprocess
12
+ import threading
13
+ import time
14
+ from datetime import datetime
15
+ from pathlib import Path
16
+ from flask import Flask, request, jsonify, Response, stream_with_context
17
+ from flask_cors import CORS
18
+ from typing import Dict, Any, List, Optional
19
+
20
+ from tactus.validation.validator import TactusValidator, ValidationMode
21
+ from tactus.core.registry import ValidationMessage
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+ # Workspace state
26
+ WORKSPACE_ROOT = None
27
+
28
+
29
+ class TactusLSPHandler:
30
+ """LSP handler for Tactus DSL."""
31
+
32
+ def __init__(self):
33
+ self.validator = TactusValidator()
34
+ self.documents: Dict[str, str] = {}
35
+ self.registries: Dict[str, Any] = {}
36
+
37
+ def validate_document(self, uri: str, text: str) -> List[Dict[str, Any]]:
38
+ """Validate document and return LSP diagnostics."""
39
+ self.documents[uri] = text
40
+
41
+ try:
42
+ result = self.validator.validate(text, ValidationMode.FULL)
43
+
44
+ if result.registry:
45
+ self.registries[uri] = result.registry
46
+
47
+ diagnostics = []
48
+ for error in result.errors:
49
+ diagnostic = self._convert_to_diagnostic(error, "Error")
50
+ if diagnostic:
51
+ diagnostics.append(diagnostic)
52
+
53
+ for warning in result.warnings:
54
+ diagnostic = self._convert_to_diagnostic(warning, "Warning")
55
+ if diagnostic:
56
+ diagnostics.append(diagnostic)
57
+
58
+ return diagnostics
59
+ except Exception as e:
60
+ logger.error(f"Error validating document {uri}: {e}", exc_info=True)
61
+ return []
62
+
63
+ def _convert_to_diagnostic(
64
+ self, message: ValidationMessage, severity_str: str
65
+ ) -> Optional[Dict[str, Any]]:
66
+ """Convert ValidationMessage to LSP diagnostic."""
67
+ severity = 1 if severity_str == "Error" else 2
68
+
69
+ line = message.location[0] - 1 if message.location else 0
70
+ col = message.location[1] - 1 if message.location and len(message.location) > 1 else 0
71
+
72
+ return {
73
+ "range": {
74
+ "start": {"line": line, "character": col},
75
+ "end": {"line": line, "character": col + 10},
76
+ },
77
+ "severity": severity,
78
+ "source": "tactus",
79
+ "message": message.message,
80
+ }
81
+
82
+ def close_document(self, uri: str):
83
+ """Close a document."""
84
+ self.documents.pop(uri, None)
85
+ self.registries.pop(uri, None)
86
+
87
+
88
+ class LSPServer:
89
+ """Language Server Protocol server for Tactus DSL."""
90
+
91
+ def __init__(self):
92
+ self.handler = TactusLSPHandler()
93
+ self.initialized = False
94
+ self.client_capabilities = {}
95
+
96
+ def handle_message(self, message: Dict[str, Any]) -> Optional[Dict[str, Any]]:
97
+ """Handle LSP JSON-RPC message."""
98
+ method = message.get("method")
99
+ params = message.get("params", {})
100
+ msg_id = message.get("id")
101
+
102
+ try:
103
+ if method == "initialize":
104
+ result = self._handle_initialize(params)
105
+ else:
106
+ logger.warning(f"Unhandled LSP method: {method}")
107
+ return self._error_response(msg_id, -32601, f"Method not found: {method}")
108
+
109
+ if msg_id is not None:
110
+ return {"jsonrpc": "2.0", "id": msg_id, "result": result}
111
+ except Exception as e:
112
+ logger.error(f"Error handling {method}: {e}", exc_info=True)
113
+ return self._error_response(msg_id, -32603, str(e))
114
+
115
+ def _handle_initialize(self, params: Dict[str, Any]) -> Dict[str, Any]:
116
+ """Handle initialize request."""
117
+ self.client_capabilities = params.get("capabilities", {})
118
+ self.initialized = True
119
+
120
+ return {
121
+ "capabilities": {
122
+ "textDocumentSync": {"openClose": True, "change": 2, "save": {"includeText": True}},
123
+ "diagnosticProvider": {
124
+ "interFileDependencies": False,
125
+ "workspaceDiagnostics": False,
126
+ },
127
+ },
128
+ "serverInfo": {"name": "tactus-lsp-server", "version": "0.1.0"},
129
+ }
130
+
131
+ def _error_response(self, msg_id: Optional[int], code: int, message: str) -> Dict[str, Any]:
132
+ """Create LSP error response."""
133
+ return {"jsonrpc": "2.0", "id": msg_id, "error": {"code": code, "message": message}}
134
+
135
+
136
+ def _resolve_workspace_path(relative_path: str) -> Path:
137
+ """
138
+ Resolve a relative path within the workspace root.
139
+ Raises ValueError if path escapes workspace or workspace not set.
140
+ """
141
+ global WORKSPACE_ROOT
142
+
143
+ if not WORKSPACE_ROOT:
144
+ raise ValueError("No workspace folder selected")
145
+
146
+ # Normalize the relative path
147
+ workspace = Path(WORKSPACE_ROOT).resolve()
148
+ target = (workspace / relative_path).resolve()
149
+
150
+ # Ensure target is within workspace (prevent path traversal)
151
+ try:
152
+ target.relative_to(workspace)
153
+ except ValueError:
154
+ raise ValueError(f"Path '{relative_path}' escapes workspace")
155
+
156
+ return target
157
+
158
+
159
+ def create_app(initial_workspace: Optional[str] = None, frontend_dist_dir: Optional[str] = None):
160
+ """Create and configure the Flask app.
161
+
162
+ Args:
163
+ initial_workspace: Initial workspace directory. If not provided, uses current directory.
164
+ frontend_dist_dir: Path to frontend dist directory. If provided, serves frontend from Flask.
165
+ """
166
+ global WORKSPACE_ROOT
167
+
168
+ # Configure Flask to serve frontend static files if provided
169
+ if frontend_dist_dir:
170
+ app = Flask(__name__, static_folder=frontend_dist_dir, static_url_path="")
171
+ else:
172
+ app = Flask(__name__)
173
+ CORS(app)
174
+
175
+ # Set initial workspace if provided
176
+ if initial_workspace:
177
+ WORKSPACE_ROOT = str(Path(initial_workspace).resolve())
178
+
179
+ # Initialize LSP server
180
+ lsp_server = LSPServer()
181
+
182
+ @app.route("/health", methods=["GET"])
183
+ def health():
184
+ """Health check endpoint."""
185
+ return jsonify({"status": "ok", "service": "tactus-ide-backend"})
186
+
187
+ @app.route("/api/workspace/cwd", methods=["GET"])
188
+ def get_cwd():
189
+ """Get current working directory (returns the initial workspace if set)."""
190
+ if WORKSPACE_ROOT:
191
+ return jsonify({"cwd": WORKSPACE_ROOT})
192
+ return jsonify({"cwd": str(Path.cwd())})
193
+
194
+ @app.route("/api/about", methods=["GET"])
195
+ def get_about_info():
196
+ """Get application version and metadata."""
197
+ from tactus import __version__
198
+
199
+ return jsonify(
200
+ {
201
+ "version": __version__,
202
+ "name": "Tactus IDE",
203
+ "description": "A Lua-based DSL for agentic workflows",
204
+ "author": "Ryan Porter",
205
+ "license": "MIT",
206
+ "repository": "https://github.com/AnthusAI/Tactus",
207
+ "documentation": "https://github.com/AnthusAI/Tactus/tree/main/docs",
208
+ "issues": "https://github.com/AnthusAI/Tactus/issues",
209
+ }
210
+ )
211
+
212
+ @app.route("/api/workspace", methods=["GET", "POST"])
213
+ def workspace_operations():
214
+ """Handle workspace operations."""
215
+ global WORKSPACE_ROOT
216
+
217
+ if request.method == "GET":
218
+ if not WORKSPACE_ROOT:
219
+ return jsonify({"root": None, "name": None})
220
+
221
+ workspace_path = Path(WORKSPACE_ROOT)
222
+ return jsonify({"root": str(workspace_path), "name": workspace_path.name})
223
+
224
+ elif request.method == "POST":
225
+ data = request.json
226
+ root = data.get("root")
227
+
228
+ if not root:
229
+ return jsonify({"error": "Missing 'root' parameter"}), 400
230
+
231
+ try:
232
+ root_path = Path(root).resolve()
233
+
234
+ if not root_path.exists():
235
+ return jsonify({"error": f"Path does not exist: {root}"}), 404
236
+
237
+ if not root_path.is_dir():
238
+ return jsonify({"error": f"Path is not a directory: {root}"}), 400
239
+
240
+ # Set workspace root and change working directory
241
+ WORKSPACE_ROOT = str(root_path)
242
+ os.chdir(WORKSPACE_ROOT)
243
+
244
+ logger.info(f"Workspace set to: {WORKSPACE_ROOT}")
245
+
246
+ return jsonify({"success": True, "root": WORKSPACE_ROOT, "name": root_path.name})
247
+ except Exception as e:
248
+ logger.error(f"Error setting workspace {root}: {e}")
249
+ return jsonify({"error": str(e)}), 500
250
+
251
+ @app.route("/api/tree", methods=["GET"])
252
+ def tree_operations():
253
+ """List directory contents within the workspace."""
254
+ global WORKSPACE_ROOT
255
+
256
+ if not WORKSPACE_ROOT:
257
+ return jsonify({"error": "No workspace folder selected"}), 400
258
+
259
+ relative_path = request.args.get("path", "")
260
+
261
+ try:
262
+ target_path = _resolve_workspace_path(relative_path)
263
+
264
+ if not target_path.exists():
265
+ return jsonify({"error": f"Path not found: {relative_path}"}), 404
266
+
267
+ if not target_path.is_dir():
268
+ return jsonify({"error": f"Path is not a directory: {relative_path}"}), 400
269
+
270
+ # List directory contents
271
+ entries = []
272
+ for item in sorted(
273
+ target_path.iterdir(), key=lambda x: (not x.is_dir(), x.name.lower())
274
+ ):
275
+ entry = {
276
+ "name": item.name,
277
+ "path": str(item.relative_to(WORKSPACE_ROOT)),
278
+ "type": "directory" if item.is_dir() else "file",
279
+ }
280
+
281
+ # Add extension for files
282
+ if item.is_file():
283
+ entry["extension"] = item.suffix
284
+
285
+ entries.append(entry)
286
+
287
+ return jsonify({"path": relative_path, "entries": entries})
288
+ except ValueError as e:
289
+ return jsonify({"error": str(e)}), 400
290
+ except Exception as e:
291
+ logger.error(f"Error listing directory {relative_path}: {e}")
292
+ return jsonify({"error": str(e)}), 500
293
+
294
+ @app.route("/api/file", methods=["GET", "POST"])
295
+ def file_operations():
296
+ """Handle file operations (read/write files within workspace)."""
297
+ if request.method == "GET":
298
+ file_path = request.args.get("path")
299
+ if not file_path:
300
+ return jsonify({"error": "Missing 'path' parameter"}), 400
301
+
302
+ try:
303
+ path = _resolve_workspace_path(file_path)
304
+
305
+ if not path.exists():
306
+ return jsonify({"error": f"File not found: {file_path}"}), 404
307
+
308
+ if not path.is_file():
309
+ return jsonify({"error": f"Path is not a file: {file_path}"}), 400
310
+
311
+ content = path.read_text()
312
+ return jsonify(
313
+ {
314
+ "path": file_path,
315
+ "absolutePath": str(path),
316
+ "content": content,
317
+ "name": path.name,
318
+ }
319
+ )
320
+ except ValueError as e:
321
+ return jsonify({"error": str(e)}), 400
322
+ except Exception as e:
323
+ logger.error(f"Error reading file {file_path}: {e}")
324
+ return jsonify({"error": str(e)}), 500
325
+
326
+ elif request.method == "POST":
327
+ data = request.json
328
+ file_path = data.get("path")
329
+ content = data.get("content")
330
+
331
+ if not file_path or content is None:
332
+ return jsonify({"error": "Missing 'path' or 'content'"}), 400
333
+
334
+ try:
335
+ path = _resolve_workspace_path(file_path)
336
+
337
+ # Create parent directories if needed
338
+ path.parent.mkdir(parents=True, exist_ok=True)
339
+ path.write_text(content)
340
+
341
+ return jsonify({"success": True, "path": file_path, "absolutePath": str(path)})
342
+ except ValueError as e:
343
+ return jsonify({"error": str(e)}), 400
344
+ except Exception as e:
345
+ logger.error(f"Error writing file {file_path}: {e}")
346
+ return jsonify({"error": str(e)}), 500
347
+
348
+ @app.route("/api/procedure/metadata", methods=["GET"])
349
+ def get_procedure_metadata():
350
+ """
351
+ Get metadata about a procedure file using TactusValidator.
352
+
353
+ Query params:
354
+ - path: workspace-relative path to procedure file (required)
355
+
356
+ Returns:
357
+ {
358
+ "success": true,
359
+ "metadata": {
360
+ "description": str | null,
361
+ "input": { name: ParameterDeclaration },
362
+ "output": { name: OutputFieldDeclaration },
363
+ "agents": { name: AgentDeclaration },
364
+ "toolsets": { name: dict },
365
+ "tools": [str] # Flattened list of all tools
366
+ }
367
+ }
368
+ """
369
+ file_path = request.args.get("path")
370
+
371
+ if not file_path:
372
+ return jsonify({"error": "Missing 'path' parameter"}), 400
373
+
374
+ try:
375
+ # Resolve path
376
+ path = _resolve_workspace_path(file_path)
377
+
378
+ if not path.exists():
379
+ return jsonify({"error": f"File not found: {file_path}"}), 404
380
+
381
+ # Validate with FULL mode to get registry
382
+ validator = TactusValidator()
383
+ result = validator.validate_file(str(path), ValidationMode.FULL)
384
+
385
+ if not result.registry:
386
+ # Validation failed or no registry
387
+ return (
388
+ jsonify(
389
+ {
390
+ "success": False,
391
+ "error": "Failed to extract metadata",
392
+ "validation_errors": [
393
+ {
394
+ "message": e.message,
395
+ "line": e.location[0] if e.location else None,
396
+ }
397
+ for e in result.errors
398
+ ],
399
+ }
400
+ ),
401
+ 400,
402
+ )
403
+
404
+ registry = result.registry
405
+
406
+ # Extract tools from agents, toolsets, and lua_tools
407
+ all_tools = set()
408
+ for agent in registry.agents.values():
409
+ all_tools.update(agent.tools)
410
+ for toolset in registry.toolsets.values():
411
+ if isinstance(toolset, dict) and "tools" in toolset:
412
+ all_tools.update(toolset["tools"])
413
+ # Include Lua-defined tools (from tool() function calls)
414
+ if hasattr(registry, "lua_tools") and registry.lua_tools:
415
+ all_tools.update(registry.lua_tools.keys())
416
+
417
+ # Parse specifications if present
418
+ specifications_data = None
419
+ if registry.gherkin_specifications:
420
+ import re
421
+
422
+ gherkin_text = registry.gherkin_specifications
423
+
424
+ # Count scenarios
425
+ scenarios = re.findall(r"^\s*Scenario:", gherkin_text, re.MULTILINE)
426
+
427
+ # Extract feature name
428
+ feature_match = re.search(r"Feature:\s*(.+)", gherkin_text)
429
+ feature_name = feature_match.group(1).strip() if feature_match else None
430
+
431
+ specifications_data = {
432
+ "text": gherkin_text,
433
+ "feature_name": feature_name,
434
+ "scenario_count": len(scenarios),
435
+ }
436
+
437
+ # Extract evaluations summary
438
+ evaluations_data = None
439
+ if registry.pydantic_evaluations:
440
+ evals = registry.pydantic_evaluations
441
+ dataset_count = 0
442
+ evaluator_count = 0
443
+
444
+ if isinstance(evals, dict):
445
+ # Count dataset items
446
+ if "dataset" in evals and isinstance(evals["dataset"], list):
447
+ dataset_count = len(evals["dataset"])
448
+
449
+ # Count evaluators
450
+ if "evaluators" in evals and isinstance(evals["evaluators"], list):
451
+ evaluator_count = len(evals["evaluators"])
452
+
453
+ evaluations_data = {
454
+ "dataset_count": dataset_count,
455
+ "evaluator_count": evaluator_count,
456
+ "runs": evals.get("runs", 1) if isinstance(evals, dict) else 1,
457
+ "parallel": evals.get("parallel", False) if isinstance(evals, dict) else False,
458
+ }
459
+
460
+ # Build metadata response
461
+ metadata = {
462
+ "description": registry.description,
463
+ "input": registry.input_schema if registry.input_schema else {},
464
+ "output": registry.output_schema if registry.output_schema else {},
465
+ "agents": {
466
+ name: {
467
+ "name": agent.name,
468
+ "provider": agent.provider,
469
+ "model": agent.model if isinstance(agent.model, str) else str(agent.model),
470
+ "system_prompt": (
471
+ agent.system_prompt
472
+ if isinstance(agent.system_prompt, str)
473
+ else "[Dynamic Prompt]"
474
+ ),
475
+ "tools": agent.tools,
476
+ }
477
+ for name, agent in registry.agents.items()
478
+ },
479
+ "toolsets": {name: toolset for name, toolset in registry.toolsets.items()},
480
+ "tools": sorted(list(all_tools)),
481
+ "specifications": specifications_data,
482
+ "evaluations": evaluations_data,
483
+ }
484
+
485
+ return jsonify({"success": True, "metadata": metadata})
486
+
487
+ except Exception as e:
488
+ logger.error(f"Error extracting procedure metadata: {e}", exc_info=True)
489
+ return jsonify({"error": str(e)}), 500
490
+
491
+ @app.route("/api/validate", methods=["POST"])
492
+ def validate_procedure():
493
+ """Validate Tactus procedure code."""
494
+ data = request.json
495
+ content = data.get("content")
496
+
497
+ if content is None:
498
+ return jsonify({"error": "Missing 'content' parameter"}), 400
499
+
500
+ try:
501
+ validator = TactusValidator()
502
+ result = validator.validate(content)
503
+
504
+ return jsonify(
505
+ {
506
+ "valid": result.valid,
507
+ "errors": [
508
+ {
509
+ "message": err.message,
510
+ "line": err.location[0] if err.location else None,
511
+ "column": err.location[1] if err.location else None,
512
+ "level": err.level,
513
+ }
514
+ for err in result.errors
515
+ ],
516
+ "warnings": [
517
+ {
518
+ "message": warn.message,
519
+ "line": warn.location[0] if warn.location else None,
520
+ "column": warn.location[1] if warn.location else None,
521
+ "level": warn.level,
522
+ }
523
+ for warn in result.warnings
524
+ ],
525
+ }
526
+ )
527
+ except Exception as e:
528
+ logger.error(f"Error validating code: {e}")
529
+ return jsonify({"error": str(e)}), 500
530
+
531
+ @app.route("/api/validate/stream", methods=["GET"])
532
+ def validate_stream():
533
+ """Validate Tactus code with SSE streaming output."""
534
+ file_path = request.args.get("path")
535
+
536
+ if not file_path:
537
+ return jsonify({"error": "Missing 'path' parameter"}), 400
538
+
539
+ try:
540
+ # Resolve path within workspace
541
+ path = _resolve_workspace_path(file_path)
542
+
543
+ # Ensure file exists
544
+ if not path.exists():
545
+ return jsonify({"error": f"File not found: {file_path}"}), 404
546
+
547
+ def generate_events():
548
+ """Generator function that yields SSE validation events."""
549
+ try:
550
+ import json
551
+ from datetime import datetime
552
+
553
+ # Read and validate file
554
+ content = path.read_text()
555
+ validator = TactusValidator()
556
+ result = validator.validate(content)
557
+
558
+ # Emit validation event
559
+ validation_event = {
560
+ "event_type": "validation",
561
+ "valid": result.valid,
562
+ "errors": [
563
+ {
564
+ "message": err.message,
565
+ "line": err.location[0] if err.location else None,
566
+ "column": err.location[1] if err.location else None,
567
+ "level": err.level,
568
+ }
569
+ for err in result.errors
570
+ ],
571
+ "warnings": [
572
+ {
573
+ "message": warn.message,
574
+ "line": warn.location[0] if warn.location else None,
575
+ "column": warn.location[1] if warn.location else None,
576
+ "level": warn.level,
577
+ }
578
+ for warn in result.warnings
579
+ ],
580
+ "timestamp": datetime.utcnow().isoformat() + "Z",
581
+ }
582
+ yield f"data: {json.dumps(validation_event)}\n\n"
583
+
584
+ except Exception as e:
585
+ logger.error(f"Error in validation: {e}", exc_info=True)
586
+ error_event = {
587
+ "event_type": "execution",
588
+ "lifecycle_stage": "error",
589
+ "timestamp": datetime.utcnow().isoformat() + "Z",
590
+ "details": {"error": str(e)},
591
+ }
592
+ yield f"data: {json.dumps(error_event)}\n\n"
593
+
594
+ return Response(
595
+ stream_with_context(generate_events()),
596
+ mimetype="text/event-stream",
597
+ headers={
598
+ "Cache-Control": "no-cache",
599
+ "X-Accel-Buffering": "no",
600
+ "Connection": "keep-alive",
601
+ },
602
+ )
603
+
604
+ except ValueError as e:
605
+ return jsonify({"error": str(e)}), 400
606
+ except Exception as e:
607
+ logger.error(f"Error setting up validation: {e}", exc_info=True)
608
+ return jsonify({"error": str(e)}), 500
609
+
610
+ @app.route("/api/run", methods=["POST"])
611
+ def run_procedure():
612
+ """Run a Tactus procedure."""
613
+ data = request.json
614
+ file_path = data.get("path")
615
+ content = data.get("content")
616
+
617
+ if not file_path:
618
+ return jsonify({"error": "Missing 'path' parameter"}), 400
619
+
620
+ try:
621
+ # Resolve path within workspace
622
+ path = _resolve_workspace_path(file_path)
623
+
624
+ # Save content if provided
625
+ if content is not None:
626
+ path.parent.mkdir(parents=True, exist_ok=True)
627
+ path.write_text(content)
628
+
629
+ # Ensure file exists
630
+ if not path.exists():
631
+ return jsonify({"error": f"File not found: {file_path}"}), 404
632
+
633
+ # Run the procedure using tactus CLI
634
+ result = subprocess.run(
635
+ ["tactus", "run", str(path)],
636
+ capture_output=True,
637
+ text=True,
638
+ timeout=30,
639
+ cwd=WORKSPACE_ROOT,
640
+ )
641
+
642
+ return jsonify(
643
+ {
644
+ "success": result.returncode == 0,
645
+ "exitCode": result.returncode,
646
+ "stdout": result.stdout,
647
+ "stderr": result.stderr,
648
+ }
649
+ )
650
+ except subprocess.TimeoutExpired:
651
+ return jsonify({"error": "Procedure execution timed out (30s)"}), 408
652
+ except ValueError as e:
653
+ return jsonify({"error": str(e)}), 400
654
+ except Exception as e:
655
+ logger.error(f"Error running procedure {file_path}: {e}")
656
+ return jsonify({"error": str(e)}), 500
657
+
658
+ @app.route("/api/run/stream", methods=["GET", "POST"])
659
+ def run_procedure_stream():
660
+ """
661
+ Run a Tactus procedure with SSE streaming output.
662
+
663
+ For GET:
664
+ - path: workspace-relative path to procedure file (required, query param)
665
+ - inputs: JSON-encoded input parameters (optional, query param)
666
+
667
+ For POST:
668
+ - path: workspace-relative path to procedure file (required, JSON body)
669
+ - content: optional file content to save before running (JSON body)
670
+ - inputs: input parameters as object (optional, JSON body)
671
+ """
672
+ if request.method == "POST":
673
+ data = request.json or {}
674
+ file_path = data.get("path")
675
+ content = data.get("content")
676
+ inputs = data.get("inputs", {})
677
+ else:
678
+ file_path = request.args.get("path")
679
+ content = None
680
+ inputs_json = request.args.get("inputs", "{}")
681
+ # Parse inputs JSON for GET
682
+ try:
683
+ inputs = json.loads(inputs_json) if inputs_json else {}
684
+ except json.JSONDecodeError as e:
685
+ return jsonify({"error": f"Invalid 'inputs' JSON: {e}"}), 400
686
+
687
+ if not file_path:
688
+ return jsonify({"error": "Missing 'path' parameter"}), 400
689
+
690
+ try:
691
+ # Resolve path within workspace
692
+ path = _resolve_workspace_path(file_path)
693
+
694
+ # Save content if provided (POST requests can include file content)
695
+ if content is not None:
696
+ path.parent.mkdir(parents=True, exist_ok=True)
697
+ path.write_text(content)
698
+
699
+ # Ensure file exists
700
+ if not path.exists():
701
+ return jsonify({"error": f"File not found: {file_path}"}), 404
702
+
703
+ procedure_id = f"ide-{path.stem}"
704
+
705
+ def generate_events():
706
+ """Generator function that yields SSE events."""
707
+ log_handler = None
708
+ all_events = [] # Collect all events to save at the end
709
+ try:
710
+ # Send start event
711
+ import json
712
+ from datetime import datetime
713
+ from tactus.adapters.ide_log import IDELogHandler
714
+ from tactus.core.runtime import TactusRuntime
715
+ from tactus.adapters.file_storage import FileStorage
716
+ from nanoid import generate
717
+
718
+ # Generate unique run_id for this execution
719
+ run_id = generate(size=21)
720
+
721
+ start_event = {
722
+ "event_type": "execution",
723
+ "lifecycle_stage": "start",
724
+ "procedure_id": procedure_id,
725
+ "run_id": run_id,
726
+ "timestamp": datetime.utcnow().isoformat() + "Z",
727
+ "details": {"path": file_path},
728
+ "inputs": inputs, # Include inputs in start event
729
+ }
730
+ all_events.append(start_event)
731
+ yield f"data: {json.dumps(start_event)}\n\n"
732
+
733
+ # Create IDE log handler to collect structured events
734
+ log_handler = IDELogHandler()
735
+
736
+ # Create storage backend
737
+ from pathlib import Path as PathLib
738
+
739
+ storage_dir = (
740
+ str(PathLib(WORKSPACE_ROOT) / ".tac" / "storage")
741
+ if WORKSPACE_ROOT
742
+ else "~/.tactus/storage"
743
+ )
744
+ storage_backend = FileStorage(storage_dir=storage_dir)
745
+
746
+ # Create runtime with log handler and run_id
747
+ runtime = TactusRuntime(
748
+ procedure_id=procedure_id,
749
+ storage_backend=storage_backend,
750
+ hitl_handler=None, # No HITL in IDE streaming mode
751
+ log_handler=log_handler,
752
+ run_id=run_id,
753
+ source_file_path=str(path),
754
+ )
755
+
756
+ # Read procedure source
757
+ source = path.read_text()
758
+
759
+ # Check Docker availability for sandbox execution
760
+ from tactus.sandbox import is_docker_available, SandboxConfig, ContainerRunner
761
+
762
+ docker_available, docker_reason = is_docker_available()
763
+ # Enable dev_mode by default in IDE for live code mounting
764
+ sandbox_config = SandboxConfig(dev_mode=True)
765
+ use_sandbox = docker_available and not sandbox_config.is_explicitly_disabled()
766
+
767
+ if use_sandbox:
768
+ logger.info("[SANDBOX] Docker available, using container execution")
769
+ else:
770
+ logger.info(
771
+ f"[SANDBOX] Direct execution (Docker: {docker_available}, reason: {docker_reason})"
772
+ )
773
+
774
+ # Create event queue for sandbox event streaming (if using sandbox)
775
+ sandbox_event_queue = None
776
+ if use_sandbox:
777
+ sandbox_event_queue = queue.Queue()
778
+
779
+ # Emit container starting event
780
+ container_starting_event = {
781
+ "event_type": "container_status",
782
+ "status": "starting",
783
+ "execution_id": run_id,
784
+ "timestamp": datetime.utcnow().isoformat() + "Z",
785
+ }
786
+ all_events.append(container_starting_event)
787
+ yield f"data: {json.dumps(container_starting_event)}\n\n"
788
+
789
+ # Run in a thread to avoid blocking
790
+ import asyncio
791
+
792
+ result_container = {
793
+ "result": None,
794
+ "error": None,
795
+ "done": False,
796
+ "container_ready": False,
797
+ }
798
+
799
+ # Capture inputs in closure scope for the thread
800
+ procedure_inputs = inputs
801
+
802
+ def run_procedure():
803
+ try:
804
+ # Create new event loop for this thread
805
+ loop = asyncio.new_event_loop()
806
+ asyncio.set_event_loop(loop)
807
+
808
+ if use_sandbox:
809
+ # Use sandbox execution (events streamed via broker over UDS)
810
+ runner = ContainerRunner(sandbox_config)
811
+ exec_result = loop.run_until_complete(
812
+ runner.run(
813
+ source=source,
814
+ params=procedure_inputs,
815
+ source_file_path=str(path),
816
+ format="lua",
817
+ event_handler=(
818
+ sandbox_event_queue.put if sandbox_event_queue else None
819
+ ),
820
+ )
821
+ )
822
+
823
+ # Mark container as ready after first response
824
+ if not result_container["container_ready"]:
825
+ result_container["container_ready"] = True
826
+
827
+ # Extract result from ExecutionResult
828
+ if exec_result.status.value == "success":
829
+ result_container["result"] = exec_result.result
830
+ else:
831
+ raise Exception(exec_result.error or "Sandbox execution failed")
832
+ else:
833
+ # Direct execution (no sandbox)
834
+ result = loop.run_until_complete(
835
+ runtime.execute(source, context=procedure_inputs, format="lua")
836
+ )
837
+ result_container["result"] = result
838
+ except Exception as e:
839
+ result_container["error"] = e
840
+ finally:
841
+ result_container["done"] = True
842
+ loop.close()
843
+
844
+ exec_thread = threading.Thread(target=run_procedure)
845
+ exec_thread.daemon = True
846
+ exec_thread.start()
847
+
848
+ # Emit container running event after starting
849
+ if use_sandbox:
850
+ container_running_event = {
851
+ "event_type": "container_status",
852
+ "status": "running",
853
+ "execution_id": run_id,
854
+ "timestamp": datetime.utcnow().isoformat() + "Z",
855
+ }
856
+ all_events.append(container_running_event)
857
+ yield f"data: {json.dumps(container_running_event)}\n\n"
858
+
859
+ # Stream events based on execution mode
860
+ while not result_container["done"]:
861
+ if use_sandbox and sandbox_event_queue:
862
+ # Stream from sandbox callback queue
863
+ try:
864
+ event_dict = sandbox_event_queue.get(timeout=0.1)
865
+ all_events.append(event_dict)
866
+ yield f"data: {json.dumps(event_dict)}\n\n"
867
+ except queue.Empty:
868
+ pass
869
+ else:
870
+ # Stream from IDELogHandler (direct execution)
871
+ events = log_handler.get_events(timeout=0.1)
872
+ for event in events:
873
+ try:
874
+ # Serialize with ISO format for datetime
875
+ event_dict = event.model_dump(mode="json")
876
+ # Format timestamp: add 'Z' only if no timezone info present
877
+ iso_string = event.timestamp.isoformat()
878
+ if not (
879
+ iso_string.endswith("Z")
880
+ or "+" in iso_string
881
+ or iso_string.count("-") > 2
882
+ ):
883
+ iso_string += "Z"
884
+ event_dict["timestamp"] = iso_string
885
+ all_events.append(event_dict)
886
+ yield f"data: {json.dumps(event_dict)}\n\n"
887
+ except Exception as e:
888
+ logger.error(f"Error serializing event: {e}", exc_info=True)
889
+ logger.error(f"Event type: {type(event)}, Event: {event}")
890
+
891
+ time.sleep(0.05)
892
+
893
+ # Get any remaining events
894
+ if use_sandbox and sandbox_event_queue:
895
+ # Drain sandbox event queue
896
+ while True:
897
+ try:
898
+ event_dict = sandbox_event_queue.get_nowait()
899
+ all_events.append(event_dict)
900
+ yield f"data: {json.dumps(event_dict)}\n\n"
901
+ except queue.Empty:
902
+ break
903
+
904
+ # Emit container stopped event
905
+ container_stopped_event = {
906
+ "event_type": "container_status",
907
+ "status": "stopped",
908
+ "execution_id": run_id,
909
+ "timestamp": datetime.utcnow().isoformat() + "Z",
910
+ }
911
+ all_events.append(container_stopped_event)
912
+ yield f"data: {json.dumps(container_stopped_event)}\n\n"
913
+ else:
914
+ # Drain IDELogHandler events (direct execution)
915
+ events = log_handler.get_events(timeout=0.1)
916
+ for event in events:
917
+ try:
918
+ # Serialize with ISO format for datetime
919
+ event_dict = event.model_dump(mode="json")
920
+ # Format timestamp: add 'Z' only if no timezone info present
921
+ iso_string = event.timestamp.isoformat()
922
+ if not (
923
+ iso_string.endswith("Z")
924
+ or "+" in iso_string
925
+ or iso_string.count("-") > 2
926
+ ):
927
+ iso_string += "Z"
928
+ event_dict["timestamp"] = iso_string
929
+ all_events.append(event_dict)
930
+ yield f"data: {json.dumps(event_dict)}\n\n"
931
+ except Exception as e:
932
+ logger.error(f"Error serializing event: {e}", exc_info=True)
933
+ logger.error(f"Event type: {type(event)}, Event: {event}")
934
+
935
+ # Wait for thread to finish
936
+ exec_thread.join(timeout=1)
937
+
938
+ # Send completion event
939
+ if result_container["error"]:
940
+ complete_event = {
941
+ "event_type": "execution",
942
+ "lifecycle_stage": "error",
943
+ "procedure_id": procedure_id,
944
+ "exit_code": 1,
945
+ "timestamp": datetime.utcnow().isoformat() + "Z",
946
+ "details": {"success": False, "error": str(result_container["error"])},
947
+ }
948
+ else:
949
+ complete_event = {
950
+ "event_type": "execution",
951
+ "lifecycle_stage": "complete",
952
+ "procedure_id": procedure_id,
953
+ "exit_code": 0,
954
+ "timestamp": datetime.utcnow().isoformat() + "Z",
955
+ "details": {"success": True},
956
+ }
957
+ all_events.append(complete_event)
958
+ yield f"data: {json.dumps(complete_event)}\n\n"
959
+
960
+ # Consolidate streaming chunks before saving to disk
961
+ # Keep only the final accumulated text for each agent
962
+ consolidated_events = []
963
+ stream_chunks_by_agent = {}
964
+
965
+ for event in all_events:
966
+ if event.get("event_type") == "agent_stream_chunk":
967
+ # Track by agent name, keeping only the latest
968
+ agent_name = event.get("agent_name")
969
+ stream_chunks_by_agent[agent_name] = event
970
+ else:
971
+ consolidated_events.append(event)
972
+
973
+ # Add the final consolidated chunks
974
+ consolidated_events.extend(stream_chunks_by_agent.values())
975
+
976
+ # Save consolidated events to disk
977
+ try:
978
+ from pathlib import Path as PathLib
979
+
980
+ events_dir = PathLib(storage_dir) / "events"
981
+ events_dir.mkdir(parents=True, exist_ok=True)
982
+ events_file = events_dir / f"{run_id}.json"
983
+ with open(events_file, "w") as f:
984
+ json.dump(consolidated_events, f, indent=2)
985
+ except Exception as e:
986
+ logger.error(f"Failed to save events for run {run_id}: {e}", exc_info=True)
987
+
988
+ except Exception as e:
989
+ logger.error(f"Error in streaming execution: {e}", exc_info=True)
990
+ error_event = {
991
+ "event_type": "execution",
992
+ "lifecycle_stage": "error",
993
+ "procedure_id": procedure_id,
994
+ "timestamp": datetime.utcnow().isoformat() + "Z",
995
+ "details": {"error": str(e)},
996
+ }
997
+ yield f"data: {json.dumps(error_event)}\n\n"
998
+
999
+ return Response(
1000
+ stream_with_context(generate_events()),
1001
+ mimetype="text/event-stream",
1002
+ headers={
1003
+ "Cache-Control": "no-cache",
1004
+ "X-Accel-Buffering": "no",
1005
+ "Connection": "keep-alive",
1006
+ },
1007
+ )
1008
+
1009
+ except ValueError as e:
1010
+ return jsonify({"error": str(e)}), 400
1011
+ except Exception as e:
1012
+ logger.error(f"Error setting up streaming execution: {e}", exc_info=True)
1013
+ return jsonify({"error": str(e)}), 500
1014
+
1015
+ @app.route("/api/test/stream", methods=["GET"])
1016
+ def test_procedure_stream():
1017
+ """
1018
+ Run BDD tests with SSE streaming output.
1019
+
1020
+ Query params:
1021
+ - path: procedure file path (required)
1022
+ - mock: use mock mode (optional, default true)
1023
+ - scenario: specific scenario name (optional)
1024
+ - parallel: run in parallel (optional, default false)
1025
+ """
1026
+ file_path = request.args.get("path")
1027
+
1028
+ if not file_path:
1029
+ return jsonify({"error": "Missing 'path' parameter"}), 400
1030
+
1031
+ # Get options
1032
+ mock = request.args.get("mock", "true").lower() == "true"
1033
+ parallel = request.args.get("parallel", "false").lower() == "true"
1034
+
1035
+ try:
1036
+ # Resolve path within workspace
1037
+ path = _resolve_workspace_path(file_path)
1038
+
1039
+ # Ensure file exists
1040
+ if not path.exists():
1041
+ return jsonify({"error": f"File not found: {file_path}"}), 404
1042
+
1043
+ procedure_id = path.stem
1044
+
1045
+ def generate_events():
1046
+ """Generator function that yields SSE test events."""
1047
+ try:
1048
+ import json
1049
+ from datetime import datetime
1050
+ from tactus.validation import TactusValidator
1051
+ from tactus.testing import TactusTestRunner, GherkinParser
1052
+
1053
+ # Validate and extract specifications
1054
+ validator = TactusValidator()
1055
+ validation_result = validator.validate_file(str(path))
1056
+
1057
+ if not validation_result.valid:
1058
+ # Emit validation error
1059
+ error_event = {
1060
+ "event_type": "execution",
1061
+ "lifecycle_stage": "error",
1062
+ "procedure_id": procedure_id,
1063
+ "timestamp": datetime.utcnow().isoformat() + "Z",
1064
+ "details": {
1065
+ "error": "Validation failed",
1066
+ "errors": [
1067
+ {"message": e.message, "level": e.level}
1068
+ for e in validation_result.errors
1069
+ ],
1070
+ },
1071
+ }
1072
+ yield f"data: {json.dumps(error_event)}\n\n"
1073
+ return
1074
+
1075
+ if (
1076
+ not validation_result.registry
1077
+ or not validation_result.registry.gherkin_specifications
1078
+ ):
1079
+ # No specifications found
1080
+ error_event = {
1081
+ "event_type": "execution",
1082
+ "lifecycle_stage": "error",
1083
+ "procedure_id": procedure_id,
1084
+ "timestamp": datetime.utcnow().isoformat() + "Z",
1085
+ "details": {"error": "No specifications found in procedure"},
1086
+ }
1087
+ yield f"data: {json.dumps(error_event)}\n\n"
1088
+ return
1089
+
1090
+ # Clear Behave's global step registry before each test run
1091
+ # This prevents conflicts when running multiple tests in the same Flask process
1092
+ try:
1093
+ from behave import step_registry
1094
+
1095
+ # Clear all registered steps (each step_type maps to a list)
1096
+ step_registry.registry.steps = {
1097
+ "given": [],
1098
+ "when": [],
1099
+ "then": [],
1100
+ "step": [],
1101
+ }
1102
+ # Recreate the decorators
1103
+ from behave.step_registry import setup_step_decorators
1104
+
1105
+ setup_step_decorators()
1106
+ except Exception as e:
1107
+ logger.warning(f"Could not reset Behave step registry: {e}")
1108
+
1109
+ # Setup test runner with mocks from registry
1110
+ mock_tools = None
1111
+ if mock:
1112
+ # Start with default done mock
1113
+ mock_tools = {"done": {"status": "ok"}}
1114
+ # Add tool mocks from Mocks {} block in .tac file
1115
+ if validation_result.registry.mocks:
1116
+ for tool_name, mock_config in validation_result.registry.mocks.items():
1117
+ # Extract output/response from mock config
1118
+ if isinstance(mock_config, dict) and "output" in mock_config:
1119
+ mock_tools[tool_name] = mock_config["output"]
1120
+ else:
1121
+ mock_tools[tool_name] = mock_config
1122
+ runner = TactusTestRunner(path, mock_tools=mock_tools, mocked=mock)
1123
+ runner.setup(validation_result.registry.gherkin_specifications)
1124
+
1125
+ # Get parsed feature to count scenarios
1126
+ parser = GherkinParser()
1127
+ parsed_feature = parser.parse(validation_result.registry.gherkin_specifications)
1128
+ total_scenarios = len(parsed_feature.scenarios)
1129
+
1130
+ # Emit started event
1131
+ start_event = {
1132
+ "event_type": "test_started",
1133
+ "procedure_file": str(path),
1134
+ "total_scenarios": total_scenarios,
1135
+ "timestamp": datetime.utcnow().isoformat() + "Z",
1136
+ }
1137
+ yield f"data: {json.dumps(start_event)}\n\n"
1138
+
1139
+ # Run tests
1140
+ test_result = runner.run_tests(parallel=parallel)
1141
+
1142
+ # Emit scenario completion events
1143
+ for feature in test_result.features:
1144
+ for scenario in feature.scenarios:
1145
+ scenario_event = {
1146
+ "event_type": "test_scenario_completed",
1147
+ "scenario_name": scenario.name,
1148
+ "status": scenario.status,
1149
+ "duration": scenario.duration,
1150
+ "total_cost": scenario.total_cost,
1151
+ "total_tokens": scenario.total_tokens,
1152
+ "llm_calls": scenario.llm_calls,
1153
+ "iterations": scenario.iterations,
1154
+ "tools_used": scenario.tools_used,
1155
+ "timestamp": datetime.utcnow().isoformat() + "Z",
1156
+ }
1157
+ yield f"data: {json.dumps(scenario_event)}\n\n"
1158
+
1159
+ # Emit completed event
1160
+ complete_event = {
1161
+ "event_type": "test_completed",
1162
+ "result": {
1163
+ "total_scenarios": test_result.total_scenarios,
1164
+ "passed_scenarios": test_result.passed_scenarios,
1165
+ "failed_scenarios": test_result.failed_scenarios,
1166
+ "total_cost": test_result.total_cost,
1167
+ "total_tokens": test_result.total_tokens,
1168
+ "total_llm_calls": test_result.total_llm_calls,
1169
+ "total_iterations": test_result.total_iterations,
1170
+ "unique_tools_used": test_result.unique_tools_used,
1171
+ "features": [
1172
+ {
1173
+ "name": f.name,
1174
+ "scenarios": [
1175
+ {
1176
+ "name": s.name,
1177
+ "status": s.status,
1178
+ "duration": s.duration,
1179
+ "steps": [
1180
+ {
1181
+ "keyword": step.keyword,
1182
+ "text": step.message,
1183
+ "status": step.status,
1184
+ "error_message": step.error_message,
1185
+ }
1186
+ for step in s.steps
1187
+ ],
1188
+ }
1189
+ for s in f.scenarios
1190
+ ],
1191
+ }
1192
+ for f in test_result.features
1193
+ ],
1194
+ },
1195
+ "timestamp": datetime.utcnow().isoformat() + "Z",
1196
+ }
1197
+ yield f"data: {json.dumps(complete_event)}\n\n"
1198
+
1199
+ # Cleanup
1200
+ runner.cleanup()
1201
+
1202
+ except Exception as e:
1203
+ logger.error(f"Error in test execution: {e}", exc_info=True)
1204
+ error_event = {
1205
+ "event_type": "execution",
1206
+ "lifecycle_stage": "error",
1207
+ "procedure_id": procedure_id,
1208
+ "timestamp": datetime.utcnow().isoformat() + "Z",
1209
+ "details": {"error": str(e)},
1210
+ }
1211
+ yield f"data: {json.dumps(error_event)}\n\n"
1212
+
1213
+ return Response(
1214
+ stream_with_context(generate_events()),
1215
+ mimetype="text/event-stream",
1216
+ headers={
1217
+ "Cache-Control": "no-cache",
1218
+ "X-Accel-Buffering": "no",
1219
+ "Connection": "keep-alive",
1220
+ },
1221
+ )
1222
+
1223
+ except ValueError as e:
1224
+ return jsonify({"error": str(e)}), 400
1225
+ except Exception as e:
1226
+ logger.error(f"Error setting up test execution: {e}", exc_info=True)
1227
+ return jsonify({"error": str(e)}), 500
1228
+
1229
+ @app.route("/api/evaluate/stream", methods=["GET"])
1230
+ def evaluate_procedure_stream():
1231
+ """
1232
+ Run BDD evaluation with SSE streaming output.
1233
+
1234
+ Query params:
1235
+ - path: procedure file path (required)
1236
+ - runs: number of runs per scenario (optional, default 10)
1237
+ - mock: use mock mode (optional, default true)
1238
+ - scenario: specific scenario name (optional)
1239
+ - parallel: run in parallel (optional, default true)
1240
+ """
1241
+ file_path = request.args.get("path")
1242
+
1243
+ if not file_path:
1244
+ return jsonify({"error": "Missing 'path' parameter"}), 400
1245
+
1246
+ # Get options
1247
+ runs = int(request.args.get("runs", "10"))
1248
+ mock = request.args.get("mock", "true").lower() == "true"
1249
+ parallel = request.args.get("parallel", "true").lower() == "true"
1250
+
1251
+ try:
1252
+ # Resolve path within workspace
1253
+ path = _resolve_workspace_path(file_path)
1254
+
1255
+ # Ensure file exists
1256
+ if not path.exists():
1257
+ return jsonify({"error": f"File not found: {file_path}"}), 404
1258
+
1259
+ procedure_id = path.stem
1260
+
1261
+ def generate_events():
1262
+ """Generator function that yields SSE evaluation events."""
1263
+ try:
1264
+ import json
1265
+ from datetime import datetime
1266
+ from tactus.validation import TactusValidator
1267
+ from tactus.testing import TactusEvaluationRunner, GherkinParser
1268
+
1269
+ # Validate and extract specifications
1270
+ validator = TactusValidator()
1271
+ validation_result = validator.validate_file(str(path))
1272
+
1273
+ if not validation_result.valid:
1274
+ error_event = {
1275
+ "event_type": "execution",
1276
+ "lifecycle_stage": "error",
1277
+ "procedure_id": procedure_id,
1278
+ "timestamp": datetime.utcnow().isoformat() + "Z",
1279
+ "details": {
1280
+ "error": "Validation failed",
1281
+ "errors": [
1282
+ {"message": e.message, "level": e.level}
1283
+ for e in validation_result.errors
1284
+ ],
1285
+ },
1286
+ }
1287
+ yield f"data: {json.dumps(error_event)}\n\n"
1288
+ return
1289
+
1290
+ if (
1291
+ not validation_result.registry
1292
+ or not validation_result.registry.gherkin_specifications
1293
+ ):
1294
+ error_event = {
1295
+ "event_type": "execution",
1296
+ "lifecycle_stage": "error",
1297
+ "procedure_id": procedure_id,
1298
+ "timestamp": datetime.utcnow().isoformat() + "Z",
1299
+ "details": {"error": "No specifications found in procedure"},
1300
+ }
1301
+ yield f"data: {json.dumps(error_event)}\n\n"
1302
+ return
1303
+
1304
+ # Setup evaluation runner
1305
+ mock_tools = {"done": {"status": "ok"}} if mock else None
1306
+ evaluator = TactusEvaluationRunner(path, mock_tools=mock_tools)
1307
+ evaluator.setup(validation_result.registry.gherkin_specifications)
1308
+
1309
+ # Get parsed feature to count scenarios
1310
+ parser = GherkinParser()
1311
+ parsed_feature = parser.parse(validation_result.registry.gherkin_specifications)
1312
+ total_scenarios = len(parsed_feature.scenarios)
1313
+
1314
+ # Emit started event
1315
+ start_event = {
1316
+ "event_type": "evaluation_started",
1317
+ "procedure_file": str(path),
1318
+ "total_scenarios": total_scenarios,
1319
+ "runs_per_scenario": runs,
1320
+ "timestamp": datetime.utcnow().isoformat() + "Z",
1321
+ }
1322
+ yield f"data: {json.dumps(start_event)}\n\n"
1323
+
1324
+ # Run evaluation
1325
+ eval_results = evaluator.evaluate_all(runs=runs, parallel=parallel)
1326
+
1327
+ # Emit progress/completion events for each scenario
1328
+ for eval_result in eval_results:
1329
+ progress_event = {
1330
+ "event_type": "evaluation_progress",
1331
+ "scenario_name": eval_result.scenario_name,
1332
+ "completed_runs": eval_result.total_runs,
1333
+ "total_runs": eval_result.total_runs,
1334
+ "timestamp": datetime.utcnow().isoformat() + "Z",
1335
+ }
1336
+ yield f"data: {json.dumps(progress_event)}\n\n"
1337
+
1338
+ # Emit completed event
1339
+ complete_event = {
1340
+ "event_type": "evaluation_completed",
1341
+ "results": [
1342
+ {
1343
+ "scenario_name": r.scenario_name,
1344
+ "total_runs": r.total_runs,
1345
+ "successful_runs": r.successful_runs,
1346
+ "failed_runs": r.failed_runs,
1347
+ "success_rate": r.success_rate,
1348
+ "consistency_score": r.consistency_score,
1349
+ "is_flaky": r.is_flaky,
1350
+ "avg_duration": r.avg_duration,
1351
+ "std_duration": r.std_duration,
1352
+ }
1353
+ for r in eval_results
1354
+ ],
1355
+ "timestamp": datetime.utcnow().isoformat() + "Z",
1356
+ }
1357
+ yield f"data: {json.dumps(complete_event)}\n\n"
1358
+
1359
+ # Cleanup
1360
+ evaluator.cleanup()
1361
+
1362
+ except Exception as e:
1363
+ logger.error(f"Error in evaluation execution: {e}", exc_info=True)
1364
+ error_event = {
1365
+ "event_type": "execution",
1366
+ "lifecycle_stage": "error",
1367
+ "procedure_id": procedure_id,
1368
+ "timestamp": datetime.utcnow().isoformat() + "Z",
1369
+ "details": {"error": str(e)},
1370
+ }
1371
+ yield f"data: {json.dumps(error_event)}\n\n"
1372
+
1373
+ return Response(
1374
+ stream_with_context(generate_events()),
1375
+ mimetype="text/event-stream",
1376
+ headers={
1377
+ "Cache-Control": "no-cache",
1378
+ "X-Accel-Buffering": "no",
1379
+ "Connection": "keep-alive",
1380
+ },
1381
+ )
1382
+
1383
+ except ValueError as e:
1384
+ return jsonify({"error": str(e)}), 400
1385
+ except Exception as e:
1386
+ logger.error(f"Error setting up evaluation execution: {e}", exc_info=True)
1387
+ return jsonify({"error": str(e)}), 500
1388
+
1389
+ @app.route("/api/pydantic-eval/stream", methods=["GET"])
1390
+ def pydantic_eval_stream():
1391
+ """
1392
+ Run Pydantic Evals with SSE streaming output.
1393
+
1394
+ Query params:
1395
+ - path: procedure file path (required)
1396
+ - runs: number of runs per case (optional, default 1)
1397
+ """
1398
+ logger.info(f"Pydantic eval stream request: args={request.args}")
1399
+
1400
+ file_path = request.args.get("path")
1401
+ if not file_path:
1402
+ logger.error("Missing 'path' parameter")
1403
+ return jsonify({"error": "Missing 'path' parameter"}), 400
1404
+
1405
+ runs = int(request.args.get("runs", "1"))
1406
+
1407
+ try:
1408
+ # Resolve path within workspace
1409
+ logger.info(f"Resolving path: {file_path}")
1410
+ path = _resolve_workspace_path(file_path)
1411
+ logger.info(f"Resolved to: {path}")
1412
+
1413
+ if not path.exists():
1414
+ return jsonify({"error": f"File not found: {file_path}"}), 404
1415
+
1416
+ procedure_id = path.stem
1417
+
1418
+ def generate_events():
1419
+ """Generator function that yields SSE evaluation events."""
1420
+ try:
1421
+ from tactus.testing.pydantic_eval_runner import TactusPydanticEvalRunner
1422
+ from tactus.testing.eval_models import (
1423
+ EvaluationConfig,
1424
+ EvalCase,
1425
+ EvaluatorConfig,
1426
+ )
1427
+
1428
+ # Validate and extract evaluations
1429
+ validator = TactusValidator()
1430
+ validation_result = validator.validate_file(str(path))
1431
+
1432
+ if not validation_result.valid:
1433
+ error_event = {
1434
+ "event_type": "execution",
1435
+ "lifecycle_stage": "error",
1436
+ "procedure_id": procedure_id,
1437
+ "timestamp": datetime.utcnow().isoformat() + "Z",
1438
+ "details": {
1439
+ "error": "Validation failed",
1440
+ "errors": [e.message for e in validation_result.errors],
1441
+ },
1442
+ }
1443
+ yield f"data: {json.dumps(error_event)}\n\n"
1444
+ return
1445
+
1446
+ if (
1447
+ not validation_result.registry
1448
+ or not validation_result.registry.pydantic_evaluations
1449
+ ):
1450
+ error_event = {
1451
+ "event_type": "execution",
1452
+ "lifecycle_stage": "error",
1453
+ "procedure_id": procedure_id,
1454
+ "timestamp": datetime.utcnow().isoformat() + "Z",
1455
+ "details": {"error": "No evaluations found in procedure"},
1456
+ }
1457
+ yield f"data: {json.dumps(error_event)}\n\n"
1458
+ return
1459
+
1460
+ # Parse evaluation config FIRST (before start event)
1461
+ eval_dict = validation_result.registry.pydantic_evaluations
1462
+ dataset_cases = [EvalCase(**c) for c in eval_dict.get("dataset", [])]
1463
+ evaluators = [EvaluatorConfig(**e) for e in eval_dict.get("evaluators", [])]
1464
+
1465
+ # Parse thresholds if present
1466
+ from tactus.testing.eval_models import EvaluationThresholds
1467
+
1468
+ thresholds = None
1469
+ if "thresholds" in eval_dict:
1470
+ thresholds = EvaluationThresholds(**eval_dict["thresholds"])
1471
+
1472
+ # Use runs from file if specified, otherwise use query param
1473
+ actual_runs = eval_dict.get("runs", runs)
1474
+
1475
+ # Emit start event (after actual_runs is defined)
1476
+ start_event = {
1477
+ "event_type": "execution",
1478
+ "lifecycle_stage": "started",
1479
+ "procedure_id": procedure_id,
1480
+ "timestamp": datetime.utcnow().isoformat() + "Z",
1481
+ "details": {"type": "pydantic_eval", "runs": actual_runs},
1482
+ }
1483
+ yield f"data: {json.dumps(start_event)}\n\n"
1484
+
1485
+ eval_config = EvaluationConfig(
1486
+ dataset=dataset_cases,
1487
+ evaluators=evaluators,
1488
+ runs=actual_runs,
1489
+ parallel=False, # Sequential for IDE streaming
1490
+ thresholds=thresholds,
1491
+ )
1492
+
1493
+ # Run evaluation
1494
+ runner = TactusPydanticEvalRunner(
1495
+ procedure_file=path,
1496
+ eval_config=eval_config,
1497
+ openai_api_key=os.environ.get("OPENAI_API_KEY"),
1498
+ )
1499
+
1500
+ report = runner.run_evaluation()
1501
+
1502
+ # Emit results
1503
+ result_details = {
1504
+ "type": "pydantic_eval",
1505
+ "total_cases": len(report.cases) if hasattr(report, "cases") else 0,
1506
+ }
1507
+
1508
+ if hasattr(report, "cases"):
1509
+ result_details["cases"] = []
1510
+ for case in report.cases:
1511
+ # Convert case to dict, handling non-serializable objects
1512
+ def make_serializable(obj):
1513
+ """Recursively convert objects to JSON-serializable types."""
1514
+ if isinstance(obj, (str, int, float, bool, type(None))):
1515
+ return obj
1516
+ elif isinstance(obj, dict):
1517
+ return {k: make_serializable(v) for k, v in obj.items()}
1518
+ elif isinstance(obj, (list, tuple)):
1519
+ return [make_serializable(item) for item in obj]
1520
+ elif hasattr(obj, "__dict__"):
1521
+ # Convert object with __dict__ to dict
1522
+ return {
1523
+ k: make_serializable(v)
1524
+ for k, v in obj.__dict__.items()
1525
+ if not k.startswith("_")
1526
+ }
1527
+ else:
1528
+ return str(obj)
1529
+
1530
+ case_dict = {
1531
+ "name": str(case.name),
1532
+ "inputs": make_serializable(case.inputs),
1533
+ "output": make_serializable(case.output),
1534
+ "assertions": make_serializable(case.assertions),
1535
+ "scores": make_serializable(case.scores),
1536
+ "labels": make_serializable(case.labels),
1537
+ "duration": (
1538
+ float(case.task_duration)
1539
+ if hasattr(case, "task_duration")
1540
+ else 0.0
1541
+ ),
1542
+ }
1543
+ result_details["cases"].append(case_dict)
1544
+
1545
+ # Check thresholds
1546
+ passed, violations = runner.check_thresholds(report)
1547
+ result_details["thresholds_passed"] = passed
1548
+ if violations:
1549
+ result_details["threshold_violations"] = violations
1550
+
1551
+ result_event = {
1552
+ "event_type": "execution",
1553
+ "lifecycle_stage": "complete",
1554
+ "procedure_id": procedure_id,
1555
+ "timestamp": datetime.utcnow().isoformat() + "Z",
1556
+ "details": result_details,
1557
+ }
1558
+ yield f"data: {json.dumps(result_event)}\n\n"
1559
+
1560
+ except ImportError as e:
1561
+ error_event = {
1562
+ "event_type": "execution",
1563
+ "lifecycle_stage": "error",
1564
+ "procedure_id": procedure_id,
1565
+ "timestamp": datetime.utcnow().isoformat() + "Z",
1566
+ "details": {"error": f"pydantic_evals not installed: {e}"},
1567
+ }
1568
+ yield f"data: {json.dumps(error_event)}\n\n"
1569
+ except Exception as e:
1570
+ logger.error(f"Error running Pydantic Evals: {e}", exc_info=True)
1571
+ error_event = {
1572
+ "event_type": "execution",
1573
+ "lifecycle_stage": "error",
1574
+ "procedure_id": procedure_id,
1575
+ "timestamp": datetime.utcnow().isoformat() + "Z",
1576
+ "details": {"error": str(e)},
1577
+ }
1578
+ yield f"data: {json.dumps(error_event)}\n\n"
1579
+
1580
+ return Response(
1581
+ stream_with_context(generate_events()),
1582
+ mimetype="text/event-stream",
1583
+ headers={
1584
+ "Cache-Control": "no-cache",
1585
+ "X-Accel-Buffering": "no",
1586
+ "Connection": "keep-alive",
1587
+ },
1588
+ )
1589
+
1590
+ except Exception as e:
1591
+ logger.error(f"Error setting up Pydantic Evals: {e}", exc_info=True)
1592
+ return jsonify({"error": str(e)}), 500
1593
+
1594
+ @app.route("/api/traces/runs", methods=["GET"])
1595
+ def list_trace_runs():
1596
+ """List all execution runs by grouping checkpoints by run_id."""
1597
+ try:
1598
+ from pathlib import Path as PathLib
1599
+ from tactus.adapters.file_storage import FileStorage
1600
+ from collections import defaultdict
1601
+
1602
+ # Get optional query params
1603
+ procedure = request.args.get("procedure")
1604
+ limit = int(request.args.get("limit", "50"))
1605
+
1606
+ # Create storage backend
1607
+ storage_dir = (
1608
+ str(PathLib(WORKSPACE_ROOT) / ".tac" / "storage")
1609
+ if WORKSPACE_ROOT
1610
+ else "~/.tactus/storage"
1611
+ )
1612
+ storage_backend = FileStorage(storage_dir=storage_dir)
1613
+
1614
+ # Load procedure metadata
1615
+ if not procedure:
1616
+ return jsonify({"runs": []})
1617
+
1618
+ metadata = storage_backend.load_procedure_metadata(procedure)
1619
+
1620
+ # Group checkpoints by run_id
1621
+ runs_dict = defaultdict(list)
1622
+ for checkpoint in metadata.execution_log:
1623
+ runs_dict[checkpoint.run_id].append(checkpoint)
1624
+
1625
+ # Build runs list
1626
+ runs_data = []
1627
+ for run_id, checkpoints in runs_dict.items():
1628
+ # Sort checkpoints by position
1629
+ checkpoints.sort(key=lambda c: c.position)
1630
+
1631
+ # Get start/end times
1632
+ start_time = checkpoints[0].timestamp if checkpoints else None
1633
+ end_time = checkpoints[-1].timestamp if checkpoints else None
1634
+
1635
+ runs_data.append(
1636
+ {
1637
+ "run_id": run_id,
1638
+ "procedure_name": procedure,
1639
+ "start_time": start_time.isoformat() if start_time else None,
1640
+ "end_time": end_time.isoformat() if end_time else None,
1641
+ "status": "COMPLETED", # Can be enhanced later
1642
+ "checkpoint_count": len(checkpoints),
1643
+ }
1644
+ )
1645
+
1646
+ # Sort by start_time (most recent first)
1647
+ runs_data.sort(key=lambda r: r["start_time"] or "", reverse=True)
1648
+
1649
+ # Apply limit
1650
+ runs_data = runs_data[:limit]
1651
+
1652
+ return jsonify({"runs": runs_data})
1653
+ except Exception as e:
1654
+ logger.error(f"Error listing trace runs: {e}", exc_info=True)
1655
+ return jsonify({"error": str(e)}), 500
1656
+
1657
+ @app.route("/api/traces/runs/<run_id>", methods=["GET"])
1658
+ def get_trace_run(run_id: str):
1659
+ """Get a specific execution run by filtering checkpoints by run_id."""
1660
+ try:
1661
+ from pathlib import Path as PathLib
1662
+ from tactus.adapters.file_storage import FileStorage
1663
+
1664
+ # Get procedure name from query param or try to find it
1665
+ procedure = request.args.get("procedure")
1666
+ if not procedure:
1667
+ return jsonify({"error": "procedure parameter required"}), 400
1668
+
1669
+ # Create storage backend
1670
+ storage_dir = (
1671
+ str(PathLib(WORKSPACE_ROOT) / ".tac" / "storage")
1672
+ if WORKSPACE_ROOT
1673
+ else "~/.tactus/storage"
1674
+ )
1675
+ storage_backend = FileStorage(storage_dir=storage_dir)
1676
+
1677
+ # Load procedure metadata
1678
+ metadata = storage_backend.load_procedure_metadata(procedure)
1679
+
1680
+ # Filter checkpoints by run_id
1681
+ run_checkpoints = [cp for cp in metadata.execution_log if cp.run_id == run_id]
1682
+
1683
+ if not run_checkpoints:
1684
+ return jsonify({"error": f"Run not found: {run_id}"}), 404
1685
+
1686
+ # Sort by position
1687
+ run_checkpoints.sort(key=lambda c: c.position)
1688
+
1689
+ # Get start/end times
1690
+ start_time = run_checkpoints[0].timestamp if run_checkpoints else None
1691
+ end_time = run_checkpoints[-1].timestamp if run_checkpoints else None
1692
+
1693
+ # Convert to API format
1694
+ run_dict = {
1695
+ "run_id": run_id,
1696
+ "procedure_name": procedure,
1697
+ "file_path": "",
1698
+ "start_time": start_time.isoformat() if start_time else None,
1699
+ "end_time": end_time.isoformat() if end_time else None,
1700
+ "status": "COMPLETED",
1701
+ "execution_log": [
1702
+ {
1703
+ "position": cp.position,
1704
+ "type": cp.type,
1705
+ "result": cp.result,
1706
+ "timestamp": cp.timestamp.isoformat() if cp.timestamp else None,
1707
+ "duration_ms": cp.duration_ms,
1708
+ "source_location": (
1709
+ cp.source_location.model_dump() if cp.source_location else None
1710
+ ),
1711
+ "captured_vars": cp.captured_vars,
1712
+ }
1713
+ for cp in run_checkpoints
1714
+ ],
1715
+ "final_state": metadata.state,
1716
+ "breakpoints": [],
1717
+ }
1718
+
1719
+ return jsonify(run_dict)
1720
+ except Exception as e:
1721
+ logger.error(f"Error getting trace run {run_id}: {e}", exc_info=True)
1722
+ return jsonify({"error": str(e)}), 500
1723
+
1724
+ @app.route("/api/traces/runs/<run_id>/checkpoints", methods=["GET"])
1725
+ def get_run_checkpoints(run_id: str):
1726
+ """Get all checkpoints for a specific run."""
1727
+ try:
1728
+ from pathlib import Path as PathLib
1729
+ from tactus.adapters.file_storage import FileStorage
1730
+
1731
+ # Get procedure name from query param
1732
+ procedure = request.args.get("procedure")
1733
+ if not procedure:
1734
+ return jsonify({"error": "procedure parameter required"}), 400
1735
+
1736
+ # Create storage backend
1737
+ storage_dir = (
1738
+ str(PathLib(WORKSPACE_ROOT) / ".tac" / "storage")
1739
+ if WORKSPACE_ROOT
1740
+ else "~/.tactus/storage"
1741
+ )
1742
+ storage_backend = FileStorage(storage_dir=storage_dir)
1743
+
1744
+ # Load procedure metadata
1745
+ metadata = storage_backend.load_procedure_metadata(procedure)
1746
+
1747
+ # Filter checkpoints by run_id
1748
+ run_checkpoints = [cp for cp in metadata.execution_log if cp.run_id == run_id]
1749
+
1750
+ # Sort by position
1751
+ run_checkpoints.sort(key=lambda c: c.position)
1752
+
1753
+ # Convert to dict format
1754
+ checkpoints_dict = [
1755
+ {
1756
+ "run_id": cp.run_id,
1757
+ "position": cp.position,
1758
+ "name": cp.type, # Use 'type' field as the name (e.g., "agent_turn")
1759
+ "timestamp": cp.timestamp.isoformat() if cp.timestamp else None,
1760
+ "source_location": (
1761
+ {
1762
+ "file": cp.source_location.file,
1763
+ "line": cp.source_location.line,
1764
+ }
1765
+ if cp.source_location
1766
+ else None
1767
+ ),
1768
+ "data": getattr(cp, "data", None), # Not all checkpoints have 'data'
1769
+ }
1770
+ for cp in run_checkpoints
1771
+ ]
1772
+
1773
+ return jsonify({"checkpoints": checkpoints_dict})
1774
+ except Exception as e:
1775
+ logger.error(f"Error getting checkpoints for run {run_id}: {e}", exc_info=True)
1776
+ return jsonify({"error": str(e)}), 500
1777
+
1778
+ @app.route("/api/traces/runs/<run_id>/checkpoints/<int:position>", methods=["GET"])
1779
+ def get_checkpoint(run_id: str, position: int):
1780
+ """Get a specific checkpoint from a run by filtering by run_id."""
1781
+ try:
1782
+ from pathlib import Path as PathLib
1783
+ from tactus.adapters.file_storage import FileStorage
1784
+
1785
+ # Get procedure name from query param
1786
+ procedure = request.args.get("procedure")
1787
+ if not procedure:
1788
+ return jsonify({"error": "procedure parameter required"}), 400
1789
+
1790
+ # Create storage backend
1791
+ storage_dir = (
1792
+ str(PathLib(WORKSPACE_ROOT) / ".tac" / "storage")
1793
+ if WORKSPACE_ROOT
1794
+ else "~/.tactus/storage"
1795
+ )
1796
+ storage_backend = FileStorage(storage_dir=storage_dir)
1797
+
1798
+ # Load procedure metadata
1799
+ metadata = storage_backend.load_procedure_metadata(procedure)
1800
+
1801
+ # Find checkpoint by run_id and position
1802
+ checkpoint = next(
1803
+ (
1804
+ cp
1805
+ for cp in metadata.execution_log
1806
+ if cp.run_id == run_id and cp.position == position
1807
+ ),
1808
+ None,
1809
+ )
1810
+
1811
+ if not checkpoint:
1812
+ return (
1813
+ jsonify({"error": f"Checkpoint position {position} not found in run {run_id}"}),
1814
+ 404,
1815
+ )
1816
+
1817
+ # Convert to API format
1818
+ cp_dict = {
1819
+ "position": checkpoint.position,
1820
+ "type": checkpoint.type,
1821
+ "result": checkpoint.result,
1822
+ "timestamp": checkpoint.timestamp.isoformat() if checkpoint.timestamp else None,
1823
+ "duration_ms": checkpoint.duration_ms,
1824
+ "source_location": (
1825
+ checkpoint.source_location.model_dump() if checkpoint.source_location else None
1826
+ ),
1827
+ "captured_vars": checkpoint.captured_vars,
1828
+ }
1829
+
1830
+ return jsonify(cp_dict)
1831
+ except Exception as e:
1832
+ logger.error(f"Error getting checkpoint {run_id}@{position}: {e}", exc_info=True)
1833
+ return jsonify({"error": str(e)}), 500
1834
+
1835
+ @app.route("/api/traces/runs/<run_id>/statistics", methods=["GET"])
1836
+ def get_run_statistics(run_id: str):
1837
+ """Get statistics for a run by filtering checkpoints by run_id."""
1838
+ try:
1839
+ from pathlib import Path as PathLib
1840
+ from tactus.adapters.file_storage import FileStorage
1841
+ from collections import Counter
1842
+
1843
+ # Get procedure name from query param
1844
+ procedure = request.args.get("procedure")
1845
+ if not procedure:
1846
+ return jsonify({"error": "procedure parameter required"}), 400
1847
+
1848
+ # Create storage backend
1849
+ storage_dir = (
1850
+ str(PathLib(WORKSPACE_ROOT) / ".tac" / "storage")
1851
+ if WORKSPACE_ROOT
1852
+ else "~/.tactus/storage"
1853
+ )
1854
+ storage_backend = FileStorage(storage_dir=storage_dir)
1855
+
1856
+ # Load procedure metadata
1857
+ metadata = storage_backend.load_procedure_metadata(procedure)
1858
+
1859
+ # Filter checkpoints by run_id
1860
+ run_checkpoints = [cp for cp in metadata.execution_log if cp.run_id == run_id]
1861
+
1862
+ if not run_checkpoints:
1863
+ return jsonify({"error": f"Run not found: {run_id}"}), 404
1864
+
1865
+ # Calculate statistics
1866
+ checkpoint_types = Counter(cp.type for cp in run_checkpoints)
1867
+ total_duration = sum(cp.duration_ms or 0 for cp in run_checkpoints)
1868
+ has_source_locations = sum(1 for cp in run_checkpoints if cp.source_location)
1869
+
1870
+ stats = {
1871
+ "run_id": run_id,
1872
+ "procedure": procedure,
1873
+ "status": "COMPLETED",
1874
+ "total_checkpoints": len(run_checkpoints),
1875
+ "checkpoints_by_type": dict(checkpoint_types),
1876
+ "total_duration_ms": total_duration,
1877
+ "has_source_locations": has_source_locations,
1878
+ }
1879
+
1880
+ return jsonify(stats)
1881
+ except Exception as e:
1882
+ logger.error(f"Error getting statistics for {run_id}: {e}", exc_info=True)
1883
+ return jsonify({"error": str(e)}), 500
1884
+
1885
+ @app.route("/api/traces/runs/<run_id>/events", methods=["GET"])
1886
+ def get_run_events(run_id: str):
1887
+ """Get all SSE events for a specific run."""
1888
+ try:
1889
+ from pathlib import Path as PathLib
1890
+
1891
+ # Determine storage directory
1892
+ storage_dir = (
1893
+ str(PathLib(WORKSPACE_ROOT) / ".tac" / "storage")
1894
+ if WORKSPACE_ROOT
1895
+ else "~/.tactus/storage"
1896
+ )
1897
+ events_dir = PathLib(storage_dir) / "events"
1898
+ events_file = events_dir / f"{run_id}.json"
1899
+
1900
+ if not events_file.exists():
1901
+ return jsonify({"error": f"Events not found for run {run_id}"}), 404
1902
+
1903
+ # Load events from file
1904
+ with open(events_file, "r") as f:
1905
+ events = json.load(f)
1906
+
1907
+ return jsonify({"events": events})
1908
+ except Exception as e:
1909
+ logger.error(f"Error getting events for {run_id}: {e}", exc_info=True)
1910
+ return jsonify({"error": str(e)}), 500
1911
+
1912
+ # Coding Assistant - persistent agent instance per session
1913
+ coding_assistant = None
1914
+
1915
+ def get_or_create_assistant():
1916
+ """Get or create the coding assistant instance."""
1917
+ nonlocal coding_assistant
1918
+ if coding_assistant is None and WORKSPACE_ROOT:
1919
+ try:
1920
+ from tactus.ide.coding_assistant import CodingAssistantAgent
1921
+ from tactus.core.config_manager import ConfigManager
1922
+
1923
+ # Load configuration
1924
+ config_manager = ConfigManager()
1925
+ # For IDE, we don't have a procedure file, so use a dummy path
1926
+ config = config_manager._load_from_environment()
1927
+
1928
+ # Try to load user config
1929
+ for user_path in config_manager._get_user_config_paths():
1930
+ if user_path.exists():
1931
+ user_config = config_manager._load_yaml_file(user_path)
1932
+ if user_config:
1933
+ config = config_manager._deep_merge(config, user_config)
1934
+ break
1935
+
1936
+ coding_assistant = CodingAssistantAgent(WORKSPACE_ROOT, config)
1937
+ logger.info("Coding assistant initialized")
1938
+ except Exception as e:
1939
+ logger.error(f"Failed to initialize coding assistant: {e}", exc_info=True)
1940
+ raise
1941
+ return coding_assistant
1942
+
1943
+ @app.route("/api/chat", methods=["POST"])
1944
+ def chat_message():
1945
+ """Handle chat messages from the user."""
1946
+ try:
1947
+ data = request.json
1948
+ message = data.get("message")
1949
+
1950
+ if not message:
1951
+ return jsonify({"error": "Missing 'message' parameter"}), 400
1952
+
1953
+ if not WORKSPACE_ROOT:
1954
+ return jsonify({"error": "No workspace folder selected"}), 400
1955
+
1956
+ # Get or create assistant
1957
+ assistant = get_or_create_assistant()
1958
+
1959
+ # Process message
1960
+ result = assistant.process_message(message)
1961
+
1962
+ return jsonify(
1963
+ {
1964
+ "success": True,
1965
+ "response": result["response"],
1966
+ "tool_calls": result.get("tool_calls", []),
1967
+ }
1968
+ )
1969
+
1970
+ except Exception as e:
1971
+ logger.error(f"Error handling chat message: {e}", exc_info=True)
1972
+ return jsonify({"error": str(e)}), 500
1973
+
1974
+ @app.route("/api/chat/stream", methods=["POST"])
1975
+ def chat_stream():
1976
+ """
1977
+ Stream chat responses with SSE using our working implementation.
1978
+
1979
+ Request body:
1980
+ - workspace_root: Workspace path
1981
+ - message: User's message
1982
+ - config: Optional config with provider, model, etc.
1983
+ """
1984
+ try:
1985
+ import sys
1986
+ import os
1987
+ import uuid
1988
+ import asyncio
1989
+
1990
+ # Add backend directory to path so we can import our modules
1991
+ backend_dir = os.path.join(
1992
+ os.path.dirname(__file__), "..", "..", "tactus-ide", "backend"
1993
+ )
1994
+ if backend_dir not in sys.path:
1995
+ sys.path.insert(0, backend_dir)
1996
+
1997
+ from assistant_service import AssistantService
1998
+
1999
+ data = request.json or {}
2000
+ workspace_root = data.get("workspace_root") or WORKSPACE_ROOT
2001
+ user_message = data.get("message")
2002
+ config = data.get(
2003
+ "config",
2004
+ {"provider": "openai", "model": "gpt-4o", "temperature": 0.7, "max_tokens": 4000},
2005
+ )
2006
+
2007
+ if not workspace_root or not user_message:
2008
+ return jsonify({"error": "workspace_root and message required"}), 400
2009
+
2010
+ # Create service instance
2011
+ conversation_id = str(uuid.uuid4())
2012
+ service = AssistantService(workspace_root, config)
2013
+
2014
+ def generate():
2015
+ """Generator function that yields SSE events."""
2016
+ loop = asyncio.new_event_loop()
2017
+ asyncio.set_event_loop(loop)
2018
+
2019
+ try:
2020
+ # Start conversation (configures DSPy LM internally)
2021
+ loop.run_until_complete(service.start_conversation(conversation_id))
2022
+
2023
+ # Send immediate thinking indicator
2024
+ yield f"data: {json.dumps({'type': 'thinking', 'content': 'Processing your request...'})}\n\n"
2025
+
2026
+ # Create async generator
2027
+ async_gen = service.send_message(user_message)
2028
+
2029
+ # Consume events one at a time and yield immediately
2030
+ while True:
2031
+ try:
2032
+ event = loop.run_until_complete(async_gen.__anext__())
2033
+ yield f"data: {json.dumps(event)}\n\n"
2034
+ except StopAsyncIteration:
2035
+ break
2036
+
2037
+ except Exception as e:
2038
+ logger.error(f"Error streaming message: {e}", exc_info=True)
2039
+ yield f"data: {json.dumps({'type': 'error', 'error': str(e)})}\n\n"
2040
+ finally:
2041
+ loop.close()
2042
+
2043
+ return Response(
2044
+ stream_with_context(generate()),
2045
+ mimetype="text/event-stream",
2046
+ headers={
2047
+ "Cache-Control": "no-cache",
2048
+ "X-Accel-Buffering": "no",
2049
+ "Connection": "keep-alive",
2050
+ },
2051
+ )
2052
+
2053
+ except Exception as e:
2054
+ logger.error(f"Error in stream endpoint: {e}", exc_info=True)
2055
+ return jsonify({"error": str(e)}), 500
2056
+
2057
+ @app.route("/api/chat/reset", methods=["POST"])
2058
+ def chat_reset():
2059
+ """Reset the chat conversation."""
2060
+ try:
2061
+ assistant = get_or_create_assistant()
2062
+ if assistant:
2063
+ assistant.reset_conversation()
2064
+ return jsonify({"success": True})
2065
+ return jsonify({"error": "Assistant not initialized"}), 400
2066
+ except Exception as e:
2067
+ logger.error(f"Error resetting chat: {e}", exc_info=True)
2068
+ return jsonify({"error": str(e)}), 500
2069
+
2070
+ @app.route("/api/chat/tools", methods=["GET"])
2071
+ def chat_tools():
2072
+ """Get available tools for the coding assistant."""
2073
+ try:
2074
+ assistant = get_or_create_assistant()
2075
+ if assistant:
2076
+ tools = assistant.get_available_tools()
2077
+ return jsonify({"tools": tools})
2078
+ return jsonify({"error": "Assistant not initialized"}), 400
2079
+ except Exception as e:
2080
+ logger.error(f"Error getting tools: {e}", exc_info=True)
2081
+ return jsonify({"error": str(e)}), 500
2082
+
2083
+ @app.route("/api/lsp", methods=["POST"])
2084
+ def lsp_request():
2085
+ """Handle LSP requests via HTTP."""
2086
+ try:
2087
+ message = request.json
2088
+ logger.debug(f"Received LSP message: {message.get('method')}")
2089
+ response = lsp_server.handle_message(message)
2090
+
2091
+ if response:
2092
+ return jsonify(response)
2093
+ return jsonify({"jsonrpc": "2.0", "id": message.get("id"), "result": None})
2094
+ except Exception as e:
2095
+ logger.error(f"Error handling LSP message: {e}")
2096
+ return (
2097
+ jsonify(
2098
+ {
2099
+ "jsonrpc": "2.0",
2100
+ "id": message.get("id"),
2101
+ "error": {"code": -32603, "message": str(e)},
2102
+ }
2103
+ ),
2104
+ 500,
2105
+ )
2106
+
2107
+ @app.route("/api/lsp/notification", methods=["POST"])
2108
+ def lsp_notification():
2109
+ """Handle LSP notifications via HTTP and return diagnostics."""
2110
+ try:
2111
+ message = request.json
2112
+ method = message.get("method")
2113
+ params = message.get("params", {})
2114
+
2115
+ logger.debug(f"Received LSP notification: {method}")
2116
+
2117
+ # Handle notifications that produce diagnostics
2118
+ diagnostics = []
2119
+ if method == "textDocument/didOpen":
2120
+ text_document = params.get("textDocument", {})
2121
+ uri = text_document.get("uri")
2122
+ text = text_document.get("text")
2123
+ if uri and text:
2124
+ diagnostics = lsp_server.handler.validate_document(uri, text)
2125
+ elif method == "textDocument/didChange":
2126
+ text_document = params.get("textDocument", {})
2127
+ content_changes = params.get("contentChanges", [])
2128
+ uri = text_document.get("uri")
2129
+ if uri and content_changes:
2130
+ text = content_changes[0].get("text") if content_changes else None
2131
+ if text:
2132
+ diagnostics = lsp_server.handler.validate_document(uri, text)
2133
+ elif method == "textDocument/didClose":
2134
+ text_document = params.get("textDocument", {})
2135
+ uri = text_document.get("uri")
2136
+ if uri:
2137
+ lsp_server.handler.close_document(uri)
2138
+
2139
+ # Return diagnostics if any
2140
+ if diagnostics:
2141
+ return jsonify({"status": "ok", "diagnostics": diagnostics})
2142
+
2143
+ return jsonify({"status": "ok"})
2144
+ except Exception as e:
2145
+ logger.error(f"Error handling LSP notification: {e}")
2146
+ return jsonify({"error": str(e)}), 500
2147
+
2148
+ # Register config API routes
2149
+ try:
2150
+ import sys
2151
+
2152
+ # Add tactus-ide/backend to path for imports
2153
+ # Path from tactus/ide/server.py -> project root -> tactus-ide/backend
2154
+ backend_dir = Path(__file__).parent.parent.parent / "tactus-ide" / "backend"
2155
+ if backend_dir.exists():
2156
+ sys.path.insert(0, str(backend_dir))
2157
+ from config_server import register_config_routes
2158
+
2159
+ register_config_routes(app)
2160
+ else:
2161
+ logger.warning(f"Config server backend directory not found: {backend_dir}")
2162
+ except ImportError as e:
2163
+ logger.warning(f"Could not register config routes: {e}")
2164
+
2165
+ # Serve frontend if dist directory is provided
2166
+ if frontend_dist_dir:
2167
+
2168
+ @app.route("/")
2169
+ def serve_frontend():
2170
+ """Serve the frontend index.html."""
2171
+ return app.send_static_file("index.html")
2172
+
2173
+ @app.route("/<path:path>")
2174
+ def serve_static_or_frontend(path):
2175
+ """Serve static files or index.html for client-side routing."""
2176
+ # If the file exists, serve it
2177
+ file_path = Path(frontend_dist_dir) / path
2178
+ if file_path.exists() and file_path.is_file():
2179
+ return app.send_static_file(path)
2180
+ # Otherwise, serve index.html for client-side routing (unless it's an API call)
2181
+ if not path.startswith("api/"):
2182
+ return app.send_static_file("index.html")
2183
+ # For API calls that don't match any route, return 404
2184
+ return jsonify({"error": "Not found"}), 404
2185
+
2186
+ return app
2187
+
2188
+
2189
+ def main() -> None:
2190
+ """
2191
+ Run the IDE backend server.
2192
+
2193
+ This enables `python -m tactus.ide.server` which is useful for local development
2194
+ and file-watcher based auto-reload workflows.
2195
+
2196
+ Environment variables:
2197
+ - TACTUS_IDE_HOST: Host to bind to (default: 127.0.0.1)
2198
+ - TACTUS_IDE_PORT: Port to bind to (default: 5001)
2199
+ - TACTUS_IDE_WORKSPACE: Initial workspace directory (default: current directory)
2200
+ - TACTUS_IDE_LOG_LEVEL: Logging level (default: INFO)
2201
+ """
2202
+ logging.basicConfig(level=os.environ.get("TACTUS_IDE_LOG_LEVEL", "INFO"))
2203
+
2204
+ host = os.environ.get("TACTUS_IDE_HOST", "127.0.0.1")
2205
+ port_str = os.environ.get("TACTUS_IDE_PORT", "5001")
2206
+ try:
2207
+ port = int(port_str)
2208
+ except ValueError:
2209
+ raise SystemExit(f"Invalid TACTUS_IDE_PORT: {port_str!r}")
2210
+
2211
+ # Get initial workspace from environment or use current directory
2212
+ initial_workspace = os.environ.get("TACTUS_IDE_WORKSPACE")
2213
+ if initial_workspace:
2214
+ logger.info(f"Setting initial workspace to: {initial_workspace}")
2215
+
2216
+ app = create_app(initial_workspace=initial_workspace)
2217
+ # NOTE: We intentionally disable Flask's reloader here; external watchers (e.g. watchdog)
2218
+ # should restart this process to avoid double-fork behavior.
2219
+ app.run(host=host, port=port, debug=False, threaded=True, use_reloader=False)
2220
+
2221
+
2222
+ if __name__ == "__main__":
2223
+ main()