tactus 0.31.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. tactus/__init__.py +49 -0
  2. tactus/adapters/__init__.py +9 -0
  3. tactus/adapters/broker_log.py +76 -0
  4. tactus/adapters/cli_hitl.py +189 -0
  5. tactus/adapters/cli_log.py +223 -0
  6. tactus/adapters/cost_collector_log.py +56 -0
  7. tactus/adapters/file_storage.py +367 -0
  8. tactus/adapters/http_callback_log.py +109 -0
  9. tactus/adapters/ide_log.py +71 -0
  10. tactus/adapters/lua_tools.py +336 -0
  11. tactus/adapters/mcp.py +289 -0
  12. tactus/adapters/mcp_manager.py +196 -0
  13. tactus/adapters/memory.py +53 -0
  14. tactus/adapters/plugins.py +419 -0
  15. tactus/backends/http_backend.py +58 -0
  16. tactus/backends/model_backend.py +35 -0
  17. tactus/backends/pytorch_backend.py +110 -0
  18. tactus/broker/__init__.py +12 -0
  19. tactus/broker/client.py +247 -0
  20. tactus/broker/protocol.py +183 -0
  21. tactus/broker/server.py +1123 -0
  22. tactus/broker/stdio.py +12 -0
  23. tactus/cli/__init__.py +7 -0
  24. tactus/cli/app.py +2245 -0
  25. tactus/cli/commands/__init__.py +0 -0
  26. tactus/core/__init__.py +32 -0
  27. tactus/core/config_manager.py +790 -0
  28. tactus/core/dependencies/__init__.py +14 -0
  29. tactus/core/dependencies/registry.py +180 -0
  30. tactus/core/dsl_stubs.py +2117 -0
  31. tactus/core/exceptions.py +66 -0
  32. tactus/core/execution_context.py +480 -0
  33. tactus/core/lua_sandbox.py +508 -0
  34. tactus/core/message_history_manager.py +236 -0
  35. tactus/core/mocking.py +286 -0
  36. tactus/core/output_validator.py +291 -0
  37. tactus/core/registry.py +499 -0
  38. tactus/core/runtime.py +2907 -0
  39. tactus/core/template_resolver.py +142 -0
  40. tactus/core/yaml_parser.py +301 -0
  41. tactus/docker/Dockerfile +61 -0
  42. tactus/docker/entrypoint.sh +69 -0
  43. tactus/dspy/__init__.py +39 -0
  44. tactus/dspy/agent.py +1144 -0
  45. tactus/dspy/broker_lm.py +181 -0
  46. tactus/dspy/config.py +212 -0
  47. tactus/dspy/history.py +196 -0
  48. tactus/dspy/module.py +405 -0
  49. tactus/dspy/prediction.py +318 -0
  50. tactus/dspy/signature.py +185 -0
  51. tactus/formatting/__init__.py +7 -0
  52. tactus/formatting/formatter.py +437 -0
  53. tactus/ide/__init__.py +9 -0
  54. tactus/ide/coding_assistant.py +343 -0
  55. tactus/ide/server.py +2223 -0
  56. tactus/primitives/__init__.py +49 -0
  57. tactus/primitives/control.py +168 -0
  58. tactus/primitives/file.py +229 -0
  59. tactus/primitives/handles.py +378 -0
  60. tactus/primitives/host.py +94 -0
  61. tactus/primitives/human.py +342 -0
  62. tactus/primitives/json.py +189 -0
  63. tactus/primitives/log.py +187 -0
  64. tactus/primitives/message_history.py +157 -0
  65. tactus/primitives/model.py +163 -0
  66. tactus/primitives/procedure.py +564 -0
  67. tactus/primitives/procedure_callable.py +318 -0
  68. tactus/primitives/retry.py +155 -0
  69. tactus/primitives/session.py +152 -0
  70. tactus/primitives/state.py +182 -0
  71. tactus/primitives/step.py +209 -0
  72. tactus/primitives/system.py +93 -0
  73. tactus/primitives/tool.py +375 -0
  74. tactus/primitives/tool_handle.py +279 -0
  75. tactus/primitives/toolset.py +229 -0
  76. tactus/protocols/__init__.py +38 -0
  77. tactus/protocols/chat_recorder.py +81 -0
  78. tactus/protocols/config.py +97 -0
  79. tactus/protocols/cost.py +31 -0
  80. tactus/protocols/hitl.py +71 -0
  81. tactus/protocols/log_handler.py +27 -0
  82. tactus/protocols/models.py +355 -0
  83. tactus/protocols/result.py +33 -0
  84. tactus/protocols/storage.py +90 -0
  85. tactus/providers/__init__.py +13 -0
  86. tactus/providers/base.py +92 -0
  87. tactus/providers/bedrock.py +117 -0
  88. tactus/providers/google.py +105 -0
  89. tactus/providers/openai.py +98 -0
  90. tactus/sandbox/__init__.py +63 -0
  91. tactus/sandbox/config.py +171 -0
  92. tactus/sandbox/container_runner.py +1099 -0
  93. tactus/sandbox/docker_manager.py +433 -0
  94. tactus/sandbox/entrypoint.py +227 -0
  95. tactus/sandbox/protocol.py +213 -0
  96. tactus/stdlib/__init__.py +10 -0
  97. tactus/stdlib/io/__init__.py +13 -0
  98. tactus/stdlib/io/csv.py +88 -0
  99. tactus/stdlib/io/excel.py +136 -0
  100. tactus/stdlib/io/file.py +90 -0
  101. tactus/stdlib/io/fs.py +154 -0
  102. tactus/stdlib/io/hdf5.py +121 -0
  103. tactus/stdlib/io/json.py +109 -0
  104. tactus/stdlib/io/parquet.py +83 -0
  105. tactus/stdlib/io/tsv.py +88 -0
  106. tactus/stdlib/loader.py +274 -0
  107. tactus/stdlib/tac/tactus/tools/done.tac +33 -0
  108. tactus/stdlib/tac/tactus/tools/log.tac +50 -0
  109. tactus/testing/README.md +273 -0
  110. tactus/testing/__init__.py +61 -0
  111. tactus/testing/behave_integration.py +380 -0
  112. tactus/testing/context.py +486 -0
  113. tactus/testing/eval_models.py +114 -0
  114. tactus/testing/evaluation_runner.py +222 -0
  115. tactus/testing/evaluators.py +634 -0
  116. tactus/testing/events.py +94 -0
  117. tactus/testing/gherkin_parser.py +134 -0
  118. tactus/testing/mock_agent.py +315 -0
  119. tactus/testing/mock_dependencies.py +234 -0
  120. tactus/testing/mock_hitl.py +171 -0
  121. tactus/testing/mock_registry.py +168 -0
  122. tactus/testing/mock_tools.py +133 -0
  123. tactus/testing/models.py +115 -0
  124. tactus/testing/pydantic_eval_runner.py +508 -0
  125. tactus/testing/steps/__init__.py +13 -0
  126. tactus/testing/steps/builtin.py +902 -0
  127. tactus/testing/steps/custom.py +69 -0
  128. tactus/testing/steps/registry.py +68 -0
  129. tactus/testing/test_runner.py +489 -0
  130. tactus/tracing/__init__.py +5 -0
  131. tactus/tracing/trace_manager.py +417 -0
  132. tactus/utils/__init__.py +1 -0
  133. tactus/utils/cost_calculator.py +72 -0
  134. tactus/utils/model_pricing.py +132 -0
  135. tactus/utils/safe_file_library.py +502 -0
  136. tactus/utils/safe_libraries.py +234 -0
  137. tactus/validation/LuaLexerBase.py +66 -0
  138. tactus/validation/LuaParserBase.py +23 -0
  139. tactus/validation/README.md +224 -0
  140. tactus/validation/__init__.py +7 -0
  141. tactus/validation/error_listener.py +21 -0
  142. tactus/validation/generated/LuaLexer.interp +231 -0
  143. tactus/validation/generated/LuaLexer.py +5548 -0
  144. tactus/validation/generated/LuaLexer.tokens +124 -0
  145. tactus/validation/generated/LuaLexerBase.py +66 -0
  146. tactus/validation/generated/LuaParser.interp +173 -0
  147. tactus/validation/generated/LuaParser.py +6439 -0
  148. tactus/validation/generated/LuaParser.tokens +124 -0
  149. tactus/validation/generated/LuaParserBase.py +23 -0
  150. tactus/validation/generated/LuaParserVisitor.py +118 -0
  151. tactus/validation/generated/__init__.py +7 -0
  152. tactus/validation/grammar/LuaLexer.g4 +123 -0
  153. tactus/validation/grammar/LuaParser.g4 +178 -0
  154. tactus/validation/semantic_visitor.py +817 -0
  155. tactus/validation/validator.py +157 -0
  156. tactus-0.31.2.dist-info/METADATA +1809 -0
  157. tactus-0.31.2.dist-info/RECORD +160 -0
  158. tactus-0.31.2.dist-info/WHEEL +4 -0
  159. tactus-0.31.2.dist-info/entry_points.txt +2 -0
  160. tactus-0.31.2.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,69 @@
1
+ """
2
+ Custom step manager for user-defined Lua step functions.
3
+ """
4
+
5
+ import logging
6
+ from typing import Any, Dict
7
+
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ class CustomStepManager:
13
+ """
14
+ Manages custom Lua step definitions.
15
+
16
+ Allows users to define custom steps in their procedure files
17
+ using the step() function with Lua implementations.
18
+ """
19
+
20
+ def __init__(self, lua_sandbox=None):
21
+ self.lua_sandbox = lua_sandbox
22
+ self.custom_steps: Dict[str, Any] = {}
23
+
24
+ def register_from_lua(self, step_text: str, lua_function: Any) -> None:
25
+ """
26
+ Register a custom step from Lua code.
27
+
28
+ Args:
29
+ step_text: The step text pattern (exact match)
30
+ lua_function: Lua function reference to execute
31
+ """
32
+ self.custom_steps[step_text] = lua_function
33
+ logger.debug(f"Registered custom step: {step_text}")
34
+
35
+ def execute(self, step_text: str, context: Any) -> bool:
36
+ """
37
+ Execute custom Lua step if it exists.
38
+
39
+ Args:
40
+ step_text: The step text to match
41
+ context: Test context object
42
+
43
+ Returns:
44
+ True if step was found and executed, False otherwise
45
+ """
46
+ if step_text in self.custom_steps:
47
+ lua_func = self.custom_steps[step_text]
48
+ try:
49
+ # Call Lua function with context
50
+ # The Lua function should perform assertions
51
+ lua_func(context)
52
+ return True
53
+ except Exception as e:
54
+ logger.error(f"Custom step '{step_text}' failed: {e}")
55
+ raise AssertionError(f"Custom step failed: {e}")
56
+
57
+ return False
58
+
59
+ def has_step(self, step_text: str) -> bool:
60
+ """Check if custom step exists."""
61
+ return step_text in self.custom_steps
62
+
63
+ def get_all_steps(self) -> list[str]:
64
+ """Get all registered custom step texts."""
65
+ return list(self.custom_steps.keys())
66
+
67
+ def clear(self) -> None:
68
+ """Clear all custom steps."""
69
+ self.custom_steps.clear()
@@ -0,0 +1,68 @@
1
+ """
2
+ Step registry for pattern matching and execution.
3
+ """
4
+
5
+ import re
6
+ import logging
7
+ from typing import Callable, Dict, Optional, Pattern, Tuple
8
+
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ class StepRegistry:
14
+ """
15
+ Registry of step definitions with regex pattern matching.
16
+
17
+ Matches step text against registered patterns and executes
18
+ the corresponding step functions.
19
+ """
20
+
21
+ def __init__(self):
22
+ self._steps: Dict[Pattern, Callable] = {}
23
+ self._step_patterns: Dict[str, Pattern] = {} # pattern_str -> compiled pattern
24
+
25
+ def register(self, pattern: str, func: Callable, step_type: str = "any") -> None:
26
+ """
27
+ Register a step with regex pattern.
28
+
29
+ Args:
30
+ pattern: Regex pattern to match step text
31
+ func: Function to execute when pattern matches
32
+ step_type: Type of step (given, when, then, any)
33
+ """
34
+ try:
35
+ compiled = re.compile(pattern, re.IGNORECASE)
36
+ self._steps[compiled] = func
37
+ self._step_patterns[pattern] = compiled
38
+ logger.debug(f"Registered step pattern: {pattern}")
39
+ except re.error as e:
40
+ logger.error(f"Invalid regex pattern '{pattern}': {e}")
41
+ raise ValueError(f"Invalid step pattern: {e}")
42
+
43
+ def match(self, step_text: str) -> Optional[Tuple[Callable, dict]]:
44
+ """
45
+ Find matching step function for given step text.
46
+
47
+ Args:
48
+ step_text: The step text to match
49
+
50
+ Returns:
51
+ Tuple of (function, match_groups) or None if no match
52
+ """
53
+ for pattern, func in self._steps.items():
54
+ match = pattern.match(step_text)
55
+ if match:
56
+ # Return function and captured groups as dict
57
+ return func, match.groupdict()
58
+
59
+ return None
60
+
61
+ def get_all_patterns(self) -> list[str]:
62
+ """Get all registered pattern strings."""
63
+ return list(self._step_patterns.keys())
64
+
65
+ def clear(self) -> None:
66
+ """Clear all registered steps."""
67
+ self._steps.clear()
68
+ self._step_patterns.clear()
@@ -0,0 +1,489 @@
1
+ """
2
+ Test runner for Tactus BDD testing.
3
+
4
+ Runs tests with parallel scenario execution using multiprocessing.
5
+ """
6
+
7
+ import importlib.util
8
+ import logging
9
+ import os
10
+ import sys
11
+ import subprocess
12
+ import multiprocessing
13
+ from datetime import datetime
14
+ from pathlib import Path
15
+ from typing import Any, Dict, List, Optional
16
+
17
+ from .models import (
18
+ ParsedFeature,
19
+ ScenarioResult,
20
+ StepResult,
21
+ FeatureResult,
22
+ TestResult,
23
+ )
24
+ from .gherkin_parser import GherkinParser
25
+ from .behave_integration import setup_behave_directory
26
+ from .steps.registry import StepRegistry
27
+ from .steps.builtin import register_builtin_steps
28
+ from .steps.custom import CustomStepManager
29
+
30
+ BEHAVE_AVAILABLE = importlib.util.find_spec("behave") is not None
31
+
32
+
33
+ logger = logging.getLogger(__name__)
34
+
35
+
36
+ class TactusTestRunner:
37
+ """
38
+ Runs Tactus BDD tests with parallel scenario execution.
39
+
40
+ Parses Gherkin specifications, generates Behave files,
41
+ and executes scenarios in parallel for performance.
42
+ """
43
+
44
+ def __init__(
45
+ self,
46
+ procedure_file: Path,
47
+ mock_tools: Optional[Dict] = None,
48
+ params: Optional[Dict] = None,
49
+ mcp_servers: Optional[Dict] = None,
50
+ tool_paths: Optional[List[str]] = None,
51
+ mocked: bool = False,
52
+ ):
53
+ if not BEHAVE_AVAILABLE:
54
+ raise ImportError("behave library not installed. Install with: pip install behave")
55
+
56
+ self.procedure_file = procedure_file
57
+ self.mock_tools = mock_tools or {}
58
+ self.params = params or {}
59
+ self.mcp_servers = mcp_servers or {}
60
+ self.tool_paths = tool_paths or []
61
+ self.mocked = mocked # Whether to use mocked dependencies
62
+ self.work_dir: Optional[Path] = None
63
+ self.parsed_feature: Optional[ParsedFeature] = None
64
+ self.step_registry = StepRegistry()
65
+ self.custom_steps = CustomStepManager()
66
+ self.generated_step_file: Optional[Path] = None
67
+
68
+ # Register built-in steps
69
+ register_builtin_steps(self.step_registry)
70
+
71
+ def setup(self, gherkin_text: str) -> None:
72
+ """
73
+ Setup test environment from Gherkin text.
74
+
75
+ Args:
76
+ gherkin_text: Raw Gherkin feature text
77
+ """
78
+ # Parse Gherkin
79
+ parser = GherkinParser()
80
+ self.parsed_feature = parser.parse(gherkin_text)
81
+
82
+ # Setup Behave directory with mock tools, params, and mocked flag
83
+ self.work_dir = setup_behave_directory(
84
+ self.parsed_feature,
85
+ self.step_registry,
86
+ self.custom_steps,
87
+ self.procedure_file,
88
+ mock_tools=self.mock_tools,
89
+ params=self.params,
90
+ mcp_servers=self.mcp_servers,
91
+ tool_paths=self.tool_paths,
92
+ mocked=self.mocked,
93
+ )
94
+
95
+ # Track the generated step file for cleanup
96
+ # The step file name is based on the work_dir hash
97
+ import hashlib
98
+
99
+ dir_hash = hashlib.md5(str(self.work_dir).encode()).hexdigest()[:8]
100
+ self.generated_step_file = self.work_dir / "steps" / f"tactus_steps_{dir_hash}.py"
101
+
102
+ logger.info(f"Test setup complete for feature: {self.parsed_feature.name}")
103
+
104
+ def run_tests(self, parallel: bool = True, scenario_filter: Optional[str] = None) -> TestResult:
105
+ """
106
+ Run all scenarios (optionally in parallel).
107
+
108
+ Args:
109
+ parallel: Whether to run scenarios in parallel
110
+ scenario_filter: Optional scenario name to run (runs only that scenario)
111
+
112
+ Returns:
113
+ TestResult with all scenario results
114
+ """
115
+ if not self.parsed_feature or not self.work_dir:
116
+ raise RuntimeError("Must call setup() before run_tests()")
117
+
118
+ # Get scenarios to run
119
+ scenarios = self.parsed_feature.scenarios
120
+ if scenario_filter:
121
+ scenarios = [s for s in scenarios if s.name == scenario_filter]
122
+ if not scenarios:
123
+ raise ValueError(f"Scenario not found: {scenario_filter}")
124
+
125
+ # Run scenarios
126
+ if parallel and len(scenarios) > 1:
127
+ # Run in parallel using 'spawn' to avoid Behave global state conflicts
128
+ # 'spawn' creates fresh Python interpreters for each worker
129
+ ctx = multiprocessing.get_context("spawn")
130
+ with ctx.Pool(processes=min(len(scenarios), os.cpu_count() or 1)) as pool:
131
+ scenario_results = pool.starmap(
132
+ self._run_single_scenario, [(s.name, str(self.work_dir)) for s in scenarios]
133
+ )
134
+ else:
135
+ # Run sequentially
136
+ scenario_results = [
137
+ self._run_single_scenario(s.name, str(self.work_dir)) for s in scenarios
138
+ ]
139
+
140
+ # Build feature result
141
+ feature_result = self._build_feature_result(scenario_results)
142
+
143
+ # Build test result
144
+ return self._build_test_result([feature_result])
145
+
146
+ @staticmethod
147
+ def _run_single_scenario(scenario_name: str, work_dir: str) -> ScenarioResult:
148
+ """
149
+ Run a single scenario in subprocess to avoid event loop conflicts.
150
+
151
+ Args:
152
+ scenario_name: Name of scenario to run
153
+ work_dir: Path to Behave work directory
154
+
155
+ Returns:
156
+ ScenarioResult
157
+ """
158
+ # Create tag filter for this scenario
159
+ # Remove special characters that could interfere with behave tags
160
+ import re
161
+
162
+ sanitized_name = re.sub(r"[^a-z0-9_]", "_", scenario_name.lower())
163
+ sanitized_name = re.sub(r"_+", "_", sanitized_name) # Collapse multiple underscores
164
+ tag_filter = f"scenario_{sanitized_name}"
165
+
166
+ # Use unique results file to avoid conflicts when running in parallel
167
+ import uuid
168
+
169
+ results_filename = f"results_{uuid.uuid4().hex[:8]}.json"
170
+
171
+ # Run behave in subprocess to isolate event loops
172
+ cmd = [
173
+ sys.executable,
174
+ "-m",
175
+ "behave",
176
+ str(work_dir),
177
+ "--tags",
178
+ tag_filter,
179
+ "--no-capture",
180
+ "--format",
181
+ "json",
182
+ "--outfile",
183
+ f"{work_dir}/{results_filename}",
184
+ ]
185
+
186
+ logger.debug(f"Running behave subprocess: {' '.join(cmd)}")
187
+
188
+ try:
189
+ # Ensure tactus module is importable in subprocess
190
+ env = os.environ.copy()
191
+ # Add parent directory to PYTHONPATH so tactus can be imported
192
+ project_root = Path(__file__).parent.parent.parent # Go up to project root
193
+ if "PYTHONPATH" in env:
194
+ env["PYTHONPATH"] = f"{project_root}:{env['PYTHONPATH']}"
195
+ else:
196
+ env["PYTHONPATH"] = str(project_root)
197
+
198
+ result = subprocess.run(
199
+ cmd,
200
+ capture_output=True,
201
+ text=True,
202
+ timeout=600, # 10 minute timeout
203
+ cwd=work_dir,
204
+ env=env,
205
+ )
206
+
207
+ # Check if behave ran successfully (even if tests failed)
208
+ if result.returncode not in [0, 1]:
209
+ # Return code 0 = all passed, 1 = some failed, other = error
210
+ raise RuntimeError(
211
+ f"Behave subprocess failed with return code {result.returncode}\n"
212
+ f"STDOUT: {result.stdout}\n"
213
+ f"STDERR: {result.stderr}"
214
+ )
215
+
216
+ # Parse JSON results
217
+ import json
218
+
219
+ results_file = Path(work_dir) / results_filename
220
+ if not results_file.exists():
221
+ raise RuntimeError(
222
+ f"Behave results file not found: {results_file}\n"
223
+ f"Command: {' '.join(cmd)}\n"
224
+ f"Return code: {result.returncode}\n"
225
+ f"STDOUT: {result.stdout}\n"
226
+ f"STDERR: {result.stderr}"
227
+ )
228
+
229
+ # Behave may write multiple JSON objects (one per feature run)
230
+ # We need to parse them separately and combine
231
+ with open(results_file) as f:
232
+ content = f.read().strip()
233
+ if not content:
234
+ raise RuntimeError("Behave results file is empty")
235
+
236
+ # Try to parse as single JSON first
237
+ try:
238
+ behave_results = json.loads(content)
239
+ except json.JSONDecodeError:
240
+ # Multiple JSON objects - split and parse each
241
+ behave_results = []
242
+ for line in content.split("\n"):
243
+ line = line.strip()
244
+ if line:
245
+ try:
246
+ obj = json.loads(line)
247
+ if isinstance(obj, list):
248
+ behave_results.extend(obj)
249
+ else:
250
+ behave_results.append(obj)
251
+ except json.JSONDecodeError:
252
+ continue
253
+
254
+ # Extract the scenario result
255
+ found_scenarios = []
256
+ for feature_data in behave_results:
257
+ for element in feature_data.get("elements", []):
258
+ element_name = element.get("name")
259
+ found_scenarios.append(element_name)
260
+ if element_name == scenario_name:
261
+ scenario_result = TactusTestRunner._convert_json_scenario_result(element)
262
+ # Clean up results file
263
+ try:
264
+ results_file.unlink()
265
+ except Exception:
266
+ pass
267
+ return scenario_result
268
+
269
+ # Scenario not found (shouldn't happen)
270
+ raise RuntimeError(
271
+ f"Scenario '{scenario_name}' not found in Behave JSON results. "
272
+ f"Found scenarios: {found_scenarios}. "
273
+ f"Tag filter used: scenario_{scenario_name.lower().replace(' ', '_')}. "
274
+ f"Command: {' '.join(cmd)}. "
275
+ f"Behave output: {result.stdout[:500]}"
276
+ )
277
+
278
+ except subprocess.TimeoutExpired:
279
+ raise RuntimeError(f"Scenario '{scenario_name}' timed out after 10 minutes")
280
+ except Exception as e:
281
+ logger.error(f"Error running scenario '{scenario_name}': {e}", exc_info=True)
282
+ raise
283
+
284
+ @staticmethod
285
+ def _convert_json_scenario_result(scenario_data: Dict[str, Any]) -> ScenarioResult:
286
+ """Convert Behave JSON scenario data to ScenarioResult."""
287
+ steps = []
288
+ for step_data in scenario_data.get("steps", []):
289
+ result = step_data.get("result", {})
290
+ # error_message might be a list (behave can return traceback as list)
291
+ # Convert to string if needed
292
+ error_msg = result.get("error_message")
293
+ if isinstance(error_msg, list):
294
+ error_msg = "\n".join(str(e) for e in error_msg)
295
+ elif error_msg is not None and not isinstance(error_msg, str):
296
+ error_msg = str(error_msg)
297
+
298
+ steps.append(
299
+ StepResult(
300
+ keyword=step_data.get("keyword", ""),
301
+ message=step_data.get("name", ""),
302
+ status=result.get("status", "skipped"),
303
+ duration=result.get("duration", 0.0),
304
+ error_message=error_msg,
305
+ )
306
+ )
307
+
308
+ # Calculate total duration from steps
309
+ total_duration = sum(s.duration for s in steps)
310
+
311
+ # Extract tags
312
+ tags = [tag.replace("@", "") for tag in scenario_data.get("tags", [])]
313
+
314
+ # Extract execution metrics from scenario properties (if attached by hook)
315
+ # These would be in the scenario_data dict if the hook sets them
316
+ total_cost = scenario_data.get("total_cost", 0.0)
317
+ total_tokens = scenario_data.get("total_tokens", 0)
318
+ iterations = scenario_data.get("iterations", 0)
319
+ tools_used = scenario_data.get("tools_used", [])
320
+ llm_calls = scenario_data.get("llm_calls", 0)
321
+
322
+ # Determine overall status
323
+ status = scenario_data.get("status", "passed")
324
+
325
+ return ScenarioResult(
326
+ name=scenario_data.get("name", ""),
327
+ status=status,
328
+ duration=total_duration,
329
+ steps=steps,
330
+ tags=tags,
331
+ timestamp=datetime.now(),
332
+ total_cost=total_cost,
333
+ total_tokens=total_tokens,
334
+ iterations=iterations,
335
+ tools_used=tools_used,
336
+ llm_calls=llm_calls,
337
+ )
338
+
339
+ @staticmethod
340
+ def _convert_scenario_result(behave_scenario) -> ScenarioResult:
341
+ """Convert Behave scenario object to ScenarioResult (legacy method)."""
342
+ steps = []
343
+ for behave_step in behave_scenario.steps:
344
+ steps.append(
345
+ StepResult(
346
+ keyword=behave_step.keyword,
347
+ message=behave_step.name,
348
+ status=behave_step.status.name,
349
+ duration=behave_step.duration,
350
+ error_message=(
351
+ behave_step.error_message if hasattr(behave_step, "error_message") else None
352
+ ),
353
+ )
354
+ )
355
+
356
+ # Extract execution metrics (attached by after_scenario hook)
357
+ total_cost = getattr(behave_scenario, "total_cost", 0.0)
358
+ total_tokens = getattr(behave_scenario, "total_tokens", 0)
359
+ cost_breakdown = getattr(behave_scenario, "cost_breakdown", [])
360
+ # iterations is a method, not an attribute - call it if it exists
361
+ iterations_attr = getattr(behave_scenario, "iterations", None)
362
+ iterations = iterations_attr() if callable(iterations_attr) else 0
363
+ tools_used = getattr(behave_scenario, "tools_used", [])
364
+ llm_calls = len(cost_breakdown) # Number of LLM calls = number of cost events
365
+
366
+ return ScenarioResult(
367
+ name=behave_scenario.name,
368
+ status=behave_scenario.status.name,
369
+ duration=behave_scenario.duration,
370
+ steps=steps,
371
+ tags=behave_scenario.tags,
372
+ timestamp=datetime.now(),
373
+ total_cost=total_cost,
374
+ total_tokens=total_tokens,
375
+ iterations=iterations,
376
+ tools_used=tools_used,
377
+ llm_calls=llm_calls,
378
+ )
379
+
380
+ def _build_feature_result(self, scenario_results: List[ScenarioResult]) -> FeatureResult:
381
+ """Build FeatureResult from scenario results."""
382
+ if not self.parsed_feature:
383
+ raise RuntimeError("No parsed feature available")
384
+
385
+ # Calculate feature status
386
+ all_passed = all(s.status == "passed" for s in scenario_results)
387
+ any_failed = any(s.status == "failed" for s in scenario_results)
388
+ status = "passed" if all_passed else ("failed" if any_failed else "skipped")
389
+
390
+ # Calculate total duration
391
+ total_duration = sum(s.duration for s in scenario_results)
392
+
393
+ return FeatureResult(
394
+ name=self.parsed_feature.name,
395
+ description=self.parsed_feature.description,
396
+ status=status,
397
+ duration=total_duration,
398
+ scenarios=scenario_results,
399
+ tags=self.parsed_feature.tags,
400
+ )
401
+
402
+ def _build_test_result(self, feature_results: List[FeatureResult]) -> TestResult:
403
+ """Build TestResult from feature results."""
404
+ total_scenarios = sum(len(f.scenarios) for f in feature_results)
405
+ passed_scenarios = sum(
406
+ 1 for f in feature_results for s in f.scenarios if s.status == "passed"
407
+ )
408
+ failed_scenarios = total_scenarios - passed_scenarios
409
+ total_duration = sum(f.duration for f in feature_results)
410
+
411
+ # Aggregate execution metrics across all scenarios
412
+ total_cost = sum(s.total_cost for f in feature_results for s in f.scenarios)
413
+ total_tokens = sum(s.total_tokens for f in feature_results for s in f.scenarios)
414
+ total_iterations = sum(s.iterations for f in feature_results for s in f.scenarios)
415
+ total_llm_calls = sum(s.llm_calls for f in feature_results for s in f.scenarios)
416
+
417
+ # Collect unique tools used across all scenarios
418
+ all_tools = set()
419
+ for f in feature_results:
420
+ for s in f.scenarios:
421
+ all_tools.update(s.tools_used)
422
+ unique_tools_used = sorted(list(all_tools))
423
+
424
+ return TestResult(
425
+ features=feature_results,
426
+ total_scenarios=total_scenarios,
427
+ passed_scenarios=passed_scenarios,
428
+ failed_scenarios=failed_scenarios,
429
+ total_duration=total_duration,
430
+ total_cost=total_cost,
431
+ total_tokens=total_tokens,
432
+ total_iterations=total_iterations,
433
+ total_llm_calls=total_llm_calls,
434
+ unique_tools_used=unique_tools_used,
435
+ )
436
+
437
+ def cleanup(self) -> None:
438
+ """
439
+ Cleanup temporary files and clear Behave state.
440
+
441
+ This removes:
442
+ - The temporary work directory
443
+ - Generated step modules from sys.modules
444
+ - Behave's global step registry
445
+ """
446
+ import sys
447
+ import importlib
448
+
449
+ # Clear the generated step module from sys.modules
450
+ if self.generated_step_file:
451
+ # Compute the module name that Python would use
452
+ # The step file is in work_dir/steps/tactus_steps_<hash>.py
453
+ # Python imports it as "steps.tactus_steps_<hash>"
454
+ step_module_name = f"steps.{self.generated_step_file.stem}"
455
+
456
+ # Remove all variations of this module name
457
+ modules_to_clear = [
458
+ m
459
+ for m in list(sys.modules.keys())
460
+ if step_module_name in m or self.generated_step_file.stem in m
461
+ ]
462
+
463
+ for mod in modules_to_clear:
464
+ del sys.modules[mod]
465
+ logger.debug(f"Cleared module from sys.modules: {mod}")
466
+
467
+ # Clear Behave's global step registry IN-PLACE
468
+ # IMPORTANT: We call registry.clear() instead of creating a new registry
469
+ # because the @step decorator has a closure reference to the registry object.
470
+ try:
471
+ import behave.step_registry
472
+
473
+ behave.step_registry.registry.clear()
474
+ logger.debug("Cleared Behave step registry")
475
+ except ImportError:
476
+ pass
477
+
478
+ # Invalidate import caches
479
+ importlib.invalidate_caches()
480
+
481
+ # Clean up work directory
482
+ if self.work_dir and self.work_dir.exists():
483
+ import shutil
484
+
485
+ try:
486
+ shutil.rmtree(self.work_dir)
487
+ logger.debug(f"Cleaned up work directory: {self.work_dir}")
488
+ except Exception as e:
489
+ logger.warning(f"Failed to cleanup work directory: {e}")
@@ -0,0 +1,5 @@
1
+ """Tracing and debugging support for Tactus procedures."""
2
+
3
+ from tactus.tracing.trace_manager import TraceManager
4
+
5
+ __all__ = ["TraceManager"]