tactus 0.31.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. tactus/__init__.py +49 -0
  2. tactus/adapters/__init__.py +9 -0
  3. tactus/adapters/broker_log.py +76 -0
  4. tactus/adapters/cli_hitl.py +189 -0
  5. tactus/adapters/cli_log.py +223 -0
  6. tactus/adapters/cost_collector_log.py +56 -0
  7. tactus/adapters/file_storage.py +367 -0
  8. tactus/adapters/http_callback_log.py +109 -0
  9. tactus/adapters/ide_log.py +71 -0
  10. tactus/adapters/lua_tools.py +336 -0
  11. tactus/adapters/mcp.py +289 -0
  12. tactus/adapters/mcp_manager.py +196 -0
  13. tactus/adapters/memory.py +53 -0
  14. tactus/adapters/plugins.py +419 -0
  15. tactus/backends/http_backend.py +58 -0
  16. tactus/backends/model_backend.py +35 -0
  17. tactus/backends/pytorch_backend.py +110 -0
  18. tactus/broker/__init__.py +12 -0
  19. tactus/broker/client.py +247 -0
  20. tactus/broker/protocol.py +183 -0
  21. tactus/broker/server.py +1123 -0
  22. tactus/broker/stdio.py +12 -0
  23. tactus/cli/__init__.py +7 -0
  24. tactus/cli/app.py +2245 -0
  25. tactus/cli/commands/__init__.py +0 -0
  26. tactus/core/__init__.py +32 -0
  27. tactus/core/config_manager.py +790 -0
  28. tactus/core/dependencies/__init__.py +14 -0
  29. tactus/core/dependencies/registry.py +180 -0
  30. tactus/core/dsl_stubs.py +2117 -0
  31. tactus/core/exceptions.py +66 -0
  32. tactus/core/execution_context.py +480 -0
  33. tactus/core/lua_sandbox.py +508 -0
  34. tactus/core/message_history_manager.py +236 -0
  35. tactus/core/mocking.py +286 -0
  36. tactus/core/output_validator.py +291 -0
  37. tactus/core/registry.py +499 -0
  38. tactus/core/runtime.py +2907 -0
  39. tactus/core/template_resolver.py +142 -0
  40. tactus/core/yaml_parser.py +301 -0
  41. tactus/docker/Dockerfile +61 -0
  42. tactus/docker/entrypoint.sh +69 -0
  43. tactus/dspy/__init__.py +39 -0
  44. tactus/dspy/agent.py +1144 -0
  45. tactus/dspy/broker_lm.py +181 -0
  46. tactus/dspy/config.py +212 -0
  47. tactus/dspy/history.py +196 -0
  48. tactus/dspy/module.py +405 -0
  49. tactus/dspy/prediction.py +318 -0
  50. tactus/dspy/signature.py +185 -0
  51. tactus/formatting/__init__.py +7 -0
  52. tactus/formatting/formatter.py +437 -0
  53. tactus/ide/__init__.py +9 -0
  54. tactus/ide/coding_assistant.py +343 -0
  55. tactus/ide/server.py +2223 -0
  56. tactus/primitives/__init__.py +49 -0
  57. tactus/primitives/control.py +168 -0
  58. tactus/primitives/file.py +229 -0
  59. tactus/primitives/handles.py +378 -0
  60. tactus/primitives/host.py +94 -0
  61. tactus/primitives/human.py +342 -0
  62. tactus/primitives/json.py +189 -0
  63. tactus/primitives/log.py +187 -0
  64. tactus/primitives/message_history.py +157 -0
  65. tactus/primitives/model.py +163 -0
  66. tactus/primitives/procedure.py +564 -0
  67. tactus/primitives/procedure_callable.py +318 -0
  68. tactus/primitives/retry.py +155 -0
  69. tactus/primitives/session.py +152 -0
  70. tactus/primitives/state.py +182 -0
  71. tactus/primitives/step.py +209 -0
  72. tactus/primitives/system.py +93 -0
  73. tactus/primitives/tool.py +375 -0
  74. tactus/primitives/tool_handle.py +279 -0
  75. tactus/primitives/toolset.py +229 -0
  76. tactus/protocols/__init__.py +38 -0
  77. tactus/protocols/chat_recorder.py +81 -0
  78. tactus/protocols/config.py +97 -0
  79. tactus/protocols/cost.py +31 -0
  80. tactus/protocols/hitl.py +71 -0
  81. tactus/protocols/log_handler.py +27 -0
  82. tactus/protocols/models.py +355 -0
  83. tactus/protocols/result.py +33 -0
  84. tactus/protocols/storage.py +90 -0
  85. tactus/providers/__init__.py +13 -0
  86. tactus/providers/base.py +92 -0
  87. tactus/providers/bedrock.py +117 -0
  88. tactus/providers/google.py +105 -0
  89. tactus/providers/openai.py +98 -0
  90. tactus/sandbox/__init__.py +63 -0
  91. tactus/sandbox/config.py +171 -0
  92. tactus/sandbox/container_runner.py +1099 -0
  93. tactus/sandbox/docker_manager.py +433 -0
  94. tactus/sandbox/entrypoint.py +227 -0
  95. tactus/sandbox/protocol.py +213 -0
  96. tactus/stdlib/__init__.py +10 -0
  97. tactus/stdlib/io/__init__.py +13 -0
  98. tactus/stdlib/io/csv.py +88 -0
  99. tactus/stdlib/io/excel.py +136 -0
  100. tactus/stdlib/io/file.py +90 -0
  101. tactus/stdlib/io/fs.py +154 -0
  102. tactus/stdlib/io/hdf5.py +121 -0
  103. tactus/stdlib/io/json.py +109 -0
  104. tactus/stdlib/io/parquet.py +83 -0
  105. tactus/stdlib/io/tsv.py +88 -0
  106. tactus/stdlib/loader.py +274 -0
  107. tactus/stdlib/tac/tactus/tools/done.tac +33 -0
  108. tactus/stdlib/tac/tactus/tools/log.tac +50 -0
  109. tactus/testing/README.md +273 -0
  110. tactus/testing/__init__.py +61 -0
  111. tactus/testing/behave_integration.py +380 -0
  112. tactus/testing/context.py +486 -0
  113. tactus/testing/eval_models.py +114 -0
  114. tactus/testing/evaluation_runner.py +222 -0
  115. tactus/testing/evaluators.py +634 -0
  116. tactus/testing/events.py +94 -0
  117. tactus/testing/gherkin_parser.py +134 -0
  118. tactus/testing/mock_agent.py +315 -0
  119. tactus/testing/mock_dependencies.py +234 -0
  120. tactus/testing/mock_hitl.py +171 -0
  121. tactus/testing/mock_registry.py +168 -0
  122. tactus/testing/mock_tools.py +133 -0
  123. tactus/testing/models.py +115 -0
  124. tactus/testing/pydantic_eval_runner.py +508 -0
  125. tactus/testing/steps/__init__.py +13 -0
  126. tactus/testing/steps/builtin.py +902 -0
  127. tactus/testing/steps/custom.py +69 -0
  128. tactus/testing/steps/registry.py +68 -0
  129. tactus/testing/test_runner.py +489 -0
  130. tactus/tracing/__init__.py +5 -0
  131. tactus/tracing/trace_manager.py +417 -0
  132. tactus/utils/__init__.py +1 -0
  133. tactus/utils/cost_calculator.py +72 -0
  134. tactus/utils/model_pricing.py +132 -0
  135. tactus/utils/safe_file_library.py +502 -0
  136. tactus/utils/safe_libraries.py +234 -0
  137. tactus/validation/LuaLexerBase.py +66 -0
  138. tactus/validation/LuaParserBase.py +23 -0
  139. tactus/validation/README.md +224 -0
  140. tactus/validation/__init__.py +7 -0
  141. tactus/validation/error_listener.py +21 -0
  142. tactus/validation/generated/LuaLexer.interp +231 -0
  143. tactus/validation/generated/LuaLexer.py +5548 -0
  144. tactus/validation/generated/LuaLexer.tokens +124 -0
  145. tactus/validation/generated/LuaLexerBase.py +66 -0
  146. tactus/validation/generated/LuaParser.interp +173 -0
  147. tactus/validation/generated/LuaParser.py +6439 -0
  148. tactus/validation/generated/LuaParser.tokens +124 -0
  149. tactus/validation/generated/LuaParserBase.py +23 -0
  150. tactus/validation/generated/LuaParserVisitor.py +118 -0
  151. tactus/validation/generated/__init__.py +7 -0
  152. tactus/validation/grammar/LuaLexer.g4 +123 -0
  153. tactus/validation/grammar/LuaParser.g4 +178 -0
  154. tactus/validation/semantic_visitor.py +817 -0
  155. tactus/validation/validator.py +157 -0
  156. tactus-0.31.0.dist-info/METADATA +1809 -0
  157. tactus-0.31.0.dist-info/RECORD +160 -0
  158. tactus-0.31.0.dist-info/WHEEL +4 -0
  159. tactus-0.31.0.dist-info/entry_points.txt +2 -0
  160. tactus-0.31.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,902 @@
1
+ """
2
+ Built-in step definitions for Tactus primitives.
3
+
4
+ Provides a comprehensive library of steps for testing:
5
+ - Tool calls
6
+ - State management
7
+ - Procedure completion
8
+ - Iterations and timing
9
+ - Parameters and context
10
+ - Regex pattern matching
11
+ - Fuzzy string matching
12
+ """
13
+
14
+ import logging
15
+ import re
16
+ import ast
17
+ from typing import Any
18
+
19
+ from .registry import StepRegistry
20
+
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ def _parse_step_string_literal(value: str) -> tuple[str, bool]:
26
+ """
27
+ Parse an optional quoted string literal from a step capture group.
28
+
29
+ Supports single-quoted or double-quoted Python-style escapes, e.g.:
30
+ "Hello! I'm World"
31
+ 'He said: "hi"'
32
+ "Line 1\\nLine 2"
33
+
34
+ Returns:
35
+ (parsed_value, was_quoted)
36
+ """
37
+ stripped = value.strip()
38
+ if len(stripped) >= 2 and stripped[0] in {"'", '"'} and stripped[-1] == stripped[0]:
39
+ try:
40
+ parsed = ast.literal_eval(stripped)
41
+ if isinstance(parsed, str):
42
+ return parsed, True
43
+ except Exception:
44
+ # Fall back to raw string if the literal is malformed.
45
+ return stripped, True
46
+ return value, False
47
+
48
+
49
+ def register_builtin_steps(registry: StepRegistry) -> None:
50
+ """
51
+ Register all built-in step definitions.
52
+
53
+ Args:
54
+ registry: StepRegistry to register steps with
55
+ """
56
+ # Tool-related steps
57
+ register_tool_steps(registry)
58
+
59
+ # State-related steps
60
+ register_state_steps(registry)
61
+
62
+ # Output-related steps
63
+ register_output_steps(registry)
64
+
65
+ # Completion steps
66
+ register_completion_steps(registry)
67
+
68
+ # Iteration/timing steps
69
+ register_iteration_steps(registry)
70
+
71
+ # Parameter/context steps
72
+ register_parameter_steps(registry)
73
+
74
+ # Agent steps
75
+ register_agent_steps(registry)
76
+
77
+ # Regex pattern matching steps
78
+ register_regex_steps(registry)
79
+
80
+ # Model-related steps
81
+ register_model_steps(registry)
82
+
83
+ # Fuzzy string matching steps
84
+ register_fuzzy_steps(registry)
85
+
86
+
87
+ # Tool-related steps
88
+
89
+
90
+ def register_tool_steps(registry: StepRegistry) -> None:
91
+ """Register tool-related step definitions."""
92
+
93
+ registry.register(r"the (?P<tool>[-\w]+) tool should be called", step_tool_called)
94
+
95
+ registry.register(r"the (?P<tool>[-\w]+) tool should not be called", step_tool_not_called)
96
+
97
+ registry.register(
98
+ r"the (?P<tool>[-\w]+) tool should be called at least (?P<n>\d+) time",
99
+ step_tool_called_at_least,
100
+ )
101
+
102
+ registry.register(
103
+ r"the (?P<tool>[-\w]+) tool should be called at least (?P<n>\d+) times",
104
+ step_tool_called_at_least,
105
+ )
106
+
107
+ registry.register(
108
+ r"the (?P<tool>[-\w]+) tool should be called exactly (?P<n>\d+) time",
109
+ step_tool_called_exactly,
110
+ )
111
+
112
+ registry.register(
113
+ r"the (?P<tool>[-\w]+) tool should be called exactly (?P<n>\d+) times",
114
+ step_tool_called_exactly,
115
+ )
116
+
117
+ registry.register(
118
+ r"the (?P<tool>[-\w]+) tool should be called with (?P<param>\w+)=(?P<value>.+)",
119
+ step_tool_called_with_param,
120
+ )
121
+
122
+ registry.register(
123
+ r'the tool "(?P<tool>[-\w]+)" returns (?P<value>.+)',
124
+ step_mock_tool_returns,
125
+ )
126
+
127
+
128
+ def step_tool_called(context: Any, tool: str) -> None:
129
+ """Check if a tool was called."""
130
+ assert context.tool_called(tool), f"Tool '{tool}' was not called"
131
+
132
+
133
+ def step_tool_not_called(context: Any, tool: str) -> None:
134
+ """Check if a tool was not called."""
135
+ assert not context.tool_called(tool), f"Tool '{tool}' was called but shouldn't be"
136
+
137
+
138
+ def step_tool_called_at_least(context: Any, tool: str, n: str) -> None:
139
+ """Check if tool was called at least N times."""
140
+ count = context.tool_call_count(tool)
141
+ min_count = int(n)
142
+ assert count >= min_count, f"Tool '{tool}' called {count} times, expected at least {min_count}"
143
+
144
+
145
+ def step_tool_called_exactly(context: Any, tool: str, n: str) -> None:
146
+ """Check if tool was called exactly N times."""
147
+ count = context.tool_call_count(tool)
148
+ expected = int(n)
149
+ assert count == expected, f"Tool '{tool}' called {count} times, expected exactly {expected}"
150
+
151
+
152
+ def step_tool_called_with_param(context: Any, tool: str, param: str, value: str) -> None:
153
+ """Check if tool was called with specific parameter value."""
154
+ calls = context.tool_calls(tool)
155
+ assert calls, f"Tool '{tool}' was not called"
156
+
157
+ # Check if any call has the parameter with the expected value
158
+ found = any(call.get("args", {}).get(param) == value for call in calls)
159
+ assert found, f"Tool '{tool}' was not called with {param}={value}"
160
+
161
+
162
+ def step_mock_tool_returns(context: Any, tool: str, value: str) -> None:
163
+ """Configure a runtime tool mock response for this scenario."""
164
+ parsed_value, was_quoted = _parse_step_string_literal(value)
165
+ if not was_quoted:
166
+ try:
167
+ parsed_value = ast.literal_eval(parsed_value)
168
+ except Exception:
169
+ # Treat unquoted values as plain strings (e.g., positive/neutral)
170
+ pass
171
+
172
+ if not hasattr(context, "mock_tool_returns"):
173
+ raise AssertionError("Context does not support tool mocking")
174
+
175
+ context.mock_tool_returns(tool, parsed_value)
176
+
177
+
178
+ def step_procedure_started(context: Any) -> None:
179
+ """Mark that procedure context is ready (setup step)."""
180
+ # This is a setup step - just verify context is ready
181
+ # The actual execution happens in "When" steps
182
+ assert context is not None, "Test context not initialized"
183
+
184
+
185
+ # State-related steps
186
+
187
+
188
+ def register_state_steps(registry: StepRegistry) -> None:
189
+ """Register state-related step definitions."""
190
+
191
+ registry.register(r"the state (?P<key>\w+) should be (?P<value>.+)", step_state_equals)
192
+
193
+ registry.register(r"the state (?P<key>\w+) should exist", step_state_exists)
194
+
195
+ registry.register(r"the state should contain (?P<key>\w+)", step_state_contains)
196
+
197
+
198
+ def step_state_equals(context: Any, key: str, value: str) -> None:
199
+ """Check if state value equals expected."""
200
+ actual = context.state_get(key)
201
+ value, was_quoted = _parse_step_string_literal(value)
202
+ # Convert to string for comparison
203
+ actual_str = str(actual) if actual is not None else "None"
204
+ if was_quoted:
205
+ assert actual_str == value, f"State '{key}' is '{actual_str}', expected '{value}'"
206
+ return
207
+ assert actual_str == value, f"State '{key}' is '{actual_str}', expected '{value}'"
208
+
209
+
210
+ def step_state_exists(context: Any, key: str) -> None:
211
+ """Check if state key exists."""
212
+ exists = context.state_exists(key)
213
+ assert exists, f"State key '{key}' does not exist"
214
+
215
+
216
+ def step_state_contains(context: Any, key: str) -> None:
217
+ """Check if state contains key."""
218
+ exists = context.state_exists(key)
219
+ assert exists, f"State does not contain key '{key}'"
220
+
221
+
222
+ # Output-related steps
223
+
224
+
225
+ def register_output_steps(registry: StepRegistry) -> None:
226
+ """Register output-related step definitions."""
227
+
228
+ registry.register(r"the output should exist", step_output_value_exists)
229
+ registry.register(r"the output should be (?P<value>.+)", step_output_value_equals)
230
+ registry.register(
231
+ r"the output should fuzzy match (?P<value>.+) with threshold (?P<threshold>[0-9]*\.?[0-9]+)",
232
+ step_output_value_fuzzy_match,
233
+ )
234
+ registry.register(r"the output should fuzzy match (?P<value>.+)", step_output_value_fuzzy_match)
235
+
236
+ registry.register(r"the output (?P<key>\w+) should be (?P<value>.+)", step_output_equals)
237
+
238
+ registry.register(
239
+ r"the output (?P<key>\w+) should not be (?P<value>.+)", step_output_not_equals
240
+ )
241
+
242
+ registry.register(r"the output (?P<key>\w+) should exist", step_output_exists)
243
+
244
+ registry.register(r"the output should contain (?P<key>\w+)", step_output_contains)
245
+
246
+
247
+ def step_output_equals(context: Any, key: str, value: str) -> None:
248
+ """Check if output value equals expected."""
249
+ actual = context.output_get(key)
250
+ value, was_quoted = _parse_step_string_literal(value)
251
+ if was_quoted:
252
+ actual_str = str(actual) if actual is not None else "None"
253
+ assert actual_str == value, f"Output '{key}' is '{actual_str}', expected '{value}'"
254
+ return
255
+
256
+ # Handle boolean comparison specially
257
+ if value.lower() in ("true", "false"):
258
+ expected_bool = value.lower() == "true"
259
+ if isinstance(actual, bool):
260
+ assert actual == expected_bool, f"Output '{key}' is {actual}, expected {expected_bool}"
261
+ else:
262
+ actual_str = str(actual).lower()
263
+ assert actual_str == value.lower(), f"Output '{key}' is '{actual}', expected '{value}'"
264
+ else:
265
+ # Try numeric comparison first
266
+ try:
267
+ expected_num = float(value)
268
+ if isinstance(actual, (int, float)):
269
+ assert (
270
+ actual == expected_num
271
+ ), f"Output '{key}' is {actual}, expected {expected_num}"
272
+ else:
273
+ actual_num = float(actual)
274
+ assert (
275
+ actual_num == expected_num
276
+ ), f"Output '{key}' is {actual_num}, expected {expected_num}"
277
+ except (ValueError, TypeError):
278
+ # Fall back to string comparison
279
+ actual_str = str(actual) if actual is not None else "None"
280
+ assert actual_str == value, f"Output '{key}' is '{actual_str}', expected '{value}'"
281
+
282
+
283
+ def step_output_value_exists(context: Any) -> None:
284
+ """Check if scalar output exists (non-None)."""
285
+ actual = context.output_value()
286
+ assert actual is not None, "Output is missing"
287
+
288
+
289
+ def step_output_value_equals(context: Any, value: str) -> None:
290
+ """Check if scalar output equals expected."""
291
+ actual = context.output_value()
292
+ value, was_quoted = _parse_step_string_literal(value)
293
+ if was_quoted:
294
+ actual_str = str(actual) if actual is not None else "None"
295
+ assert actual_str == value, f"Output is '{actual_str}', expected '{value}'"
296
+ return
297
+
298
+ # Handle boolean comparison specially
299
+ if value.lower() in ("true", "false"):
300
+ expected_bool = value.lower() == "true"
301
+ if isinstance(actual, bool):
302
+ assert actual == expected_bool, f"Output is {actual}, expected {expected_bool}"
303
+ else:
304
+ actual_str = str(actual).lower() if actual is not None else "none"
305
+ assert actual_str == value.lower(), f"Output is '{actual}', expected '{value}'"
306
+ return
307
+
308
+ # Try numeric comparison first
309
+ try:
310
+ expected_num = float(value)
311
+ if isinstance(actual, (int, float)):
312
+ assert actual == expected_num, f"Output is {actual}, expected {expected_num}"
313
+ else:
314
+ actual_num = float(actual)
315
+ assert actual_num == expected_num, f"Output is '{actual}', expected {expected_num}"
316
+ return
317
+ except (ValueError, TypeError):
318
+ pass
319
+
320
+ actual_str = str(actual) if actual is not None else "None"
321
+ assert actual_str == value, f"Output is '{actual_str}', expected '{value}'"
322
+
323
+
324
+ def step_output_value_fuzzy_match(context: Any, value: str, threshold: str = "0.8") -> None:
325
+ """Check if scalar output is similar to expected value above a threshold.
326
+
327
+ This is a deterministic, non-LLM fuzzy match based on string similarity.
328
+
329
+ Default behavior:
330
+ - Case-insensitive (compares lowercased text)
331
+ - Punctuation-insensitive (strips punctuation)
332
+
333
+ Multi-match syntax (best-effort):
334
+ Then the output should fuzzy match any of ["Hello", "Hi", "Hey"] with threshold 0.9
335
+ """
336
+ import difflib
337
+
338
+ def _normalize_text(text: str) -> str:
339
+ # Lowercase + strip punctuation + collapse whitespace.
340
+ normalized = re.sub(r"[^\w\s]", "", text.lower())
341
+ normalized = re.sub(r"\s+", " ", normalized).strip()
342
+ return normalized
343
+
344
+ actual = context.output_value()
345
+ assert actual is not None, "Output is missing"
346
+
347
+ try:
348
+ threshold_f = float(threshold)
349
+ except ValueError:
350
+ raise AssertionError(f"Invalid threshold: {threshold}")
351
+
352
+ expected_raw, was_quoted = _parse_step_string_literal(value)
353
+ expected_raw = expected_raw.strip() if not was_quoted else expected_raw
354
+
355
+ expected_values: list[str]
356
+
357
+ if expected_raw.lower().startswith("any of "):
358
+ values_str = expected_raw[7:].strip()
359
+ try:
360
+ parsed = ast.literal_eval(values_str)
361
+ except Exception:
362
+ parsed = None
363
+
364
+ expected_values = []
365
+ if isinstance(parsed, (list, tuple)):
366
+ for item in parsed:
367
+ expected_values.append(item if isinstance(item, str) else str(item))
368
+ else:
369
+ parts = [p.strip() for p in values_str.split(",") if p.strip()]
370
+ for part in parts:
371
+ parsed_part, _ = _parse_step_string_literal(part)
372
+ expected_values.append(parsed_part)
373
+
374
+ if not expected_values:
375
+ raise AssertionError(f"No expected values provided: {value}")
376
+ else:
377
+ expected_values = [expected_raw]
378
+
379
+ actual_norm = _normalize_text(str(actual))
380
+ best_ratio = -1.0
381
+ best_expected = None
382
+
383
+ for expected in expected_values:
384
+ expected_norm = _normalize_text(expected)
385
+ if expected_norm and (expected_norm in actual_norm or actual_norm in expected_norm):
386
+ ratio = 1.0
387
+ else:
388
+ ratio = difflib.SequenceMatcher(None, actual_norm, expected_norm).ratio()
389
+
390
+ if ratio > best_ratio:
391
+ best_ratio = ratio
392
+ best_expected = expected
393
+
394
+ assert best_ratio >= threshold_f, (
395
+ f"Output similarity is {best_ratio:.3f} (threshold {threshold_f:.3f}). "
396
+ f"Output is '{actual}', best match was '{best_expected}'. "
397
+ f"Expected: {expected_values}"
398
+ )
399
+
400
+
401
+ def step_output_not_equals(context: Any, key: str, value: str) -> None:
402
+ """Check if output value does not equal the specified value."""
403
+ actual = context.output_get(key)
404
+ value, was_quoted = _parse_step_string_literal(value)
405
+ if was_quoted:
406
+ actual_str = str(actual) if actual is not None else "None"
407
+ assert actual_str != value, f"Output '{key}' is '{actual_str}', should not be '{value}'"
408
+ return
409
+
410
+ # Handle boolean comparison specially
411
+ if value.lower() in ("true", "false"):
412
+ expected_bool = value.lower() == "true"
413
+ if isinstance(actual, bool):
414
+ assert (
415
+ actual != expected_bool
416
+ ), f"Output '{key}' is {actual}, should not be {expected_bool}"
417
+ else:
418
+ actual_str = str(actual).lower()
419
+ assert (
420
+ actual_str != value.lower()
421
+ ), f"Output '{key}' is '{actual}', should not be '{value}'"
422
+ else:
423
+ # Try numeric comparison first
424
+ try:
425
+ expected_num = float(value)
426
+ if isinstance(actual, (int, float)):
427
+ assert (
428
+ actual != expected_num
429
+ ), f"Output '{key}' is {actual}, should not be {expected_num}"
430
+ else:
431
+ actual_num = float(actual)
432
+ assert (
433
+ actual_num != expected_num
434
+ ), f"Output '{key}' is {actual_num}, should not be {expected_num}"
435
+ except (ValueError, TypeError):
436
+ # Fall back to string comparison
437
+ actual_str = str(actual) if actual is not None else "None"
438
+ assert actual_str != value, f"Output '{key}' is '{actual_str}', should not be '{value}'"
439
+
440
+
441
+ def step_output_exists(context: Any, key: str) -> None:
442
+ """Check if output key exists."""
443
+ exists = context.output_exists(key)
444
+ assert exists, f"Output key '{key}' does not exist"
445
+
446
+
447
+ def step_output_contains(context: Any, key: str) -> None:
448
+ """Check if output contains key."""
449
+ exists = context.output_exists(key)
450
+ assert exists, f"Output does not contain key '{key}'"
451
+
452
+
453
+ # Completion steps
454
+
455
+
456
+ def register_completion_steps(registry: StepRegistry) -> None:
457
+ """Register completion-related step definitions."""
458
+
459
+ registry.register(r"the procedure has started", step_procedure_started)
460
+ registry.register(r"the procedure should complete successfully", step_procedure_completes)
461
+
462
+ registry.register(r"the procedure should fail", step_procedure_fails)
463
+
464
+ registry.register(r"the stop reason should be (?P<reason>.+)", step_stop_reason_equals)
465
+
466
+ registry.register(r"the stop reason should contain (?P<text>.+)", step_stop_reason_contains)
467
+
468
+
469
+ def step_procedure_completes(context: Any) -> None:
470
+ """Check if procedure completed successfully."""
471
+ assert context.stop_success(), "Procedure did not complete successfully"
472
+
473
+
474
+ def step_procedure_fails(context: Any) -> None:
475
+ """Check if procedure failed."""
476
+ assert not context.stop_success(), "Procedure completed successfully but should have failed"
477
+
478
+
479
+ def step_stop_reason_equals(context: Any, reason: str) -> None:
480
+ """Check if stop reason equals expected."""
481
+ actual = context.stop_reason()
482
+ assert actual == reason, f"Stop reason is '{actual}', expected '{reason}'"
483
+
484
+
485
+ def step_stop_reason_contains(context: Any, text: str) -> None:
486
+ """Check if stop reason contains text."""
487
+ reason = context.stop_reason()
488
+ assert text in reason, f"Stop reason '{reason}' does not contain '{text}'"
489
+
490
+
491
+ # Iteration/timing steps
492
+
493
+
494
+ def register_iteration_steps(registry: StepRegistry) -> None:
495
+ """Register iteration and timing step definitions."""
496
+
497
+ registry.register(
498
+ r"the total iterations should be less than (?P<n>\d+)", step_iterations_less_than
499
+ )
500
+
501
+ registry.register(
502
+ r"the total iterations should be between (?P<min>\d+) and (?P<max>\d+)",
503
+ step_iterations_between,
504
+ )
505
+
506
+ registry.register(r"the agent should take at least (?P<n>\d+) turn", step_agent_turns_at_least)
507
+
508
+ registry.register(r"the agent should take at least (?P<n>\d+) turns", step_agent_turns_at_least)
509
+
510
+
511
+ def step_iterations_less_than(context: Any, n: str) -> None:
512
+ """Check if total iterations is less than N."""
513
+ iterations = context.iterations
514
+ max_iterations = int(n)
515
+ assert (
516
+ iterations < max_iterations
517
+ ), f"Total iterations is {iterations}, expected less than {max_iterations}"
518
+
519
+
520
+ def step_iterations_between(context: Any, min: str, max: str) -> None:
521
+ """Check if iterations is between min and max."""
522
+ iterations = context.iterations
523
+ min_val = int(min)
524
+ max_val = int(max)
525
+ assert (
526
+ min_val <= iterations <= max_val
527
+ ), f"Total iterations is {iterations}, expected between {min_val} and {max_val}"
528
+
529
+
530
+ def step_agent_turns_at_least(context: Any, n: str) -> None:
531
+ """Check if agent took at least N turns."""
532
+ turns = context.agent_turns()
533
+ min_turns = int(n)
534
+ assert turns >= min_turns, f"Agent took {turns} turns, expected at least {min_turns}"
535
+
536
+
537
+ # Parameter/context steps
538
+
539
+
540
+ def register_parameter_steps(registry: StepRegistry) -> None:
541
+ """Register parameter and context step definitions."""
542
+
543
+ registry.register(r"the (?P<param>\w+) parameter is (?P<value>.+)", step_parameter_equals)
544
+
545
+ registry.register(
546
+ r"the agent'?s? context should include (?P<text>.+)", step_agent_context_includes
547
+ )
548
+
549
+ # Input-setting steps (Given clauses to set procedure inputs)
550
+ registry.register(r'the input (?P<key>\w+) is "(?P<value>.+)"', step_input_set_string)
551
+
552
+ registry.register(r"the input (?P<key>\w+) is \[(?P<values>.+)\]", step_input_set_array)
553
+
554
+ registry.register(r"the input (?P<key>\w+) is (?P<value>-?\d+\.?\d*)", step_input_set_number)
555
+
556
+
557
+ def step_parameter_equals(context: Any, param: str, value: str) -> None:
558
+ """Check if parameter equals expected value."""
559
+ params = context.get_params()
560
+ actual = params.get(param)
561
+ actual_str = str(actual) if actual is not None else "None"
562
+ assert actual_str == value, f"Parameter '{param}' is '{actual_str}', expected '{value}'"
563
+
564
+
565
+ def step_agent_context_includes(context: Any, text: str) -> None:
566
+ """Check if agent context includes text."""
567
+ agent_context = context.agent_context()
568
+ assert text in agent_context, f"Agent context does not include '{text}'"
569
+
570
+
571
+ def step_input_set_string(context: Any, key: str, value: str) -> None:
572
+ """Set a string input parameter."""
573
+ context.set_input(key, value)
574
+
575
+
576
+ def step_input_set_number(context: Any, key: str, value: str) -> None:
577
+ """Set a numeric input parameter."""
578
+ # Parse as float or int
579
+ if "." in value:
580
+ context.set_input(key, float(value))
581
+ else:
582
+ context.set_input(key, int(value))
583
+
584
+
585
+ def step_input_set_array(context: Any, key: str, values: str) -> None:
586
+ """Set an array input parameter from comma-separated values."""
587
+ import ast
588
+
589
+ # Try to parse as Python literal first
590
+ try:
591
+ parsed = ast.literal_eval(f"[{values}]")
592
+ context.set_input(key, parsed)
593
+ except (ValueError, SyntaxError):
594
+ # Fall back to comma-split for simple values
595
+ items = [v.strip() for v in values.split(",")]
596
+ # Try to convert to numbers if possible
597
+ parsed_items = []
598
+ for item in items:
599
+ try:
600
+ if "." in item:
601
+ parsed_items.append(float(item))
602
+ else:
603
+ parsed_items.append(int(item))
604
+ except ValueError:
605
+ parsed_items.append(item)
606
+ context.set_input(key, parsed_items)
607
+
608
+
609
+ # Agent steps
610
+
611
+
612
+ def register_agent_steps(registry: StepRegistry) -> None:
613
+ """Register agent-related step definitions."""
614
+
615
+ registry.register(r"the (?P<agent>\w+) agent takes turn", step_agent_takes_turn)
616
+
617
+ registry.register(r"the (?P<agent>\w+) agent takes turns", step_agent_takes_turn)
618
+
619
+ registry.register(
620
+ r'the agent "(?P<agent>[^"]+)" responds with (?P<message>.+)',
621
+ step_mock_agent_responds_with,
622
+ )
623
+
624
+ registry.register(
625
+ r'the agent "(?P<agent>[^"]+)" calls tool "(?P<tool>[^"]+)" with args (?P<args>.+)',
626
+ step_mock_agent_calls_tool_with_args,
627
+ )
628
+
629
+ registry.register(
630
+ r'the agent "(?P<agent>[^"]+)" returns data (?P<data>.+)',
631
+ step_mock_agent_returns_data,
632
+ )
633
+
634
+ registry.register(r"the message is (?P<message>.+)", step_set_scenario_message)
635
+
636
+ registry.register(r"the procedure run", step_procedure_runs)
637
+
638
+ registry.register(r"the procedure runs", step_procedure_runs)
639
+
640
+
641
+ def step_agent_takes_turn(context: Any, agent: str) -> None:
642
+ """Execute agent turn(s)."""
643
+ # This step actually executes the procedure
644
+ # The agent parameter is informational - the procedure runs as defined
645
+ context.run_procedure()
646
+
647
+
648
+ def step_mock_agent_responds_with(
649
+ context: Any, agent: str, message: str, when_message: str | None = None
650
+ ) -> None:
651
+ """Configure a per-scenario mock agent response (temporal)."""
652
+ message, _ = _parse_step_string_literal(message)
653
+ when_message_parsed = None
654
+ if when_message is not None:
655
+ when_message_parsed, _ = _parse_step_string_literal(when_message)
656
+ if not hasattr(context, "mock_agent_response"):
657
+ raise AssertionError("Context does not support agent mocking")
658
+ context.mock_agent_response(agent, message, when_message=when_message_parsed)
659
+
660
+
661
+ def step_set_scenario_message(context: Any, message: str) -> None:
662
+ """Set the scenario's primary message for coordinating mocks with expectations."""
663
+ message, _ = _parse_step_string_literal(message)
664
+ if not hasattr(context, "set_scenario_message"):
665
+ raise AssertionError("Context does not support scenario message")
666
+ context.set_scenario_message(message)
667
+
668
+
669
+ def step_mock_agent_calls_tool_with_args(context: Any, agent: str, tool: str, args: str) -> None:
670
+ """Configure a per-scenario mocked agent tool call (recorded into Tool primitive)."""
671
+ args_str, _ = _parse_step_string_literal(args)
672
+ try:
673
+ parsed_args = ast.literal_eval(args_str)
674
+ except Exception:
675
+ raise AssertionError(f"Invalid tool args literal: {args}")
676
+
677
+ if not isinstance(parsed_args, dict):
678
+ raise AssertionError(f"Tool args must be an object/dict, got {type(parsed_args).__name__}")
679
+
680
+ if not hasattr(context, "mock_agent_tool_call"):
681
+ raise AssertionError("Context does not support agent tool call mocking")
682
+
683
+ context.mock_agent_tool_call(agent, tool, parsed_args)
684
+
685
+
686
+ def step_mock_agent_returns_data(context: Any, agent: str, data: str) -> None:
687
+ """Configure structured output mock data for an agent's next mocked turn."""
688
+ data_str, _ = _parse_step_string_literal(data)
689
+ try:
690
+ parsed = ast.literal_eval(data_str)
691
+ except Exception:
692
+ raise AssertionError(f"Invalid data literal: {data}")
693
+
694
+ if not isinstance(parsed, dict):
695
+ raise AssertionError(f"Data must be an object/dict, got {type(parsed).__name__}")
696
+
697
+ if not hasattr(context, "mock_agent_data"):
698
+ raise AssertionError("Context does not support agent data mocking")
699
+
700
+ context.mock_agent_data(agent, parsed)
701
+
702
+
703
+ def step_procedure_runs(context: Any) -> None:
704
+ """Execute the procedure.
705
+
706
+ Fails the step if the procedure has an execution error (e.g., undefined variables).
707
+ """
708
+ context.run_procedure()
709
+
710
+ # Check for execution errors (e.g., Lua errors like undefined variables)
711
+ # context is TactusTestContext when called from generated behave steps
712
+ if hasattr(context, "execution_result") and context.execution_result:
713
+ result = context.execution_result
714
+ if not result.get("success", True):
715
+ error = result.get("error", "Unknown error")
716
+ raise AssertionError(f"Procedure execution failed: {error}")
717
+
718
+
719
+ # Regex pattern matching steps
720
+
721
+
722
+ def register_regex_steps(registry: StepRegistry) -> None:
723
+ """Register regex pattern matching steps."""
724
+
725
+ # Output regex matching
726
+ registry.register(
727
+ r'the output (?P<key>\w+) should match pattern "(?P<pattern>.+)"',
728
+ step_output_matches_pattern,
729
+ )
730
+
731
+ # State regex matching
732
+ registry.register(
733
+ r'the state (?P<key>\w+) should match pattern "(?P<pattern>.+)"',
734
+ step_state_matches_pattern,
735
+ )
736
+
737
+ # Stop reason regex matching
738
+ registry.register(
739
+ r'the stop reason should match pattern "(?P<pattern>.+)"',
740
+ step_stop_reason_matches_pattern,
741
+ )
742
+
743
+ # Tool argument regex matching
744
+ registry.register(
745
+ r'the (?P<tool>[-\w]+) tool should be called with (?P<param>\w+) matching pattern "(?P<pattern>.+)"',
746
+ step_tool_arg_matches_pattern,
747
+ )
748
+
749
+
750
+ def step_output_matches_pattern(context: Any, key: str, pattern: str) -> None:
751
+ """Check if output value matches regex pattern."""
752
+ actual = context.output_get(key)
753
+ actual_str = str(actual) if actual is not None else ""
754
+
755
+ try:
756
+ regex = re.compile(pattern)
757
+ assert regex.search(
758
+ actual_str
759
+ ), f"Output '{key}' value '{actual_str}' does not match pattern '{pattern}'"
760
+ except re.error as e:
761
+ raise AssertionError(f"Invalid regex pattern '{pattern}': {e}")
762
+
763
+
764
+ def step_state_matches_pattern(context: Any, key: str, pattern: str) -> None:
765
+ """Check if state value matches regex pattern."""
766
+ actual = context.state_get(key)
767
+ actual_str = str(actual) if actual is not None else ""
768
+
769
+ try:
770
+ regex = re.compile(pattern)
771
+ assert regex.search(
772
+ actual_str
773
+ ), f"State '{key}' value '{actual_str}' does not match pattern '{pattern}'"
774
+ except re.error as e:
775
+ raise AssertionError(f"Invalid regex pattern '{pattern}': {e}")
776
+
777
+
778
+ def step_stop_reason_matches_pattern(context: Any, pattern: str) -> None:
779
+ """Check if stop reason matches regex pattern."""
780
+ actual = context.stop_reason()
781
+
782
+ try:
783
+ regex = re.compile(pattern)
784
+ assert regex.search(actual), f"Stop reason '{actual}' does not match pattern '{pattern}'"
785
+ except re.error as e:
786
+ raise AssertionError(f"Invalid regex pattern '{pattern}': {e}")
787
+
788
+
789
+ def step_tool_arg_matches_pattern(context: Any, tool: str, param: str, pattern: str) -> None:
790
+ """Check if tool was called with parameter matching regex pattern."""
791
+ calls = context.tool_calls(tool)
792
+ assert calls, f"Tool '{tool}' was not called"
793
+
794
+ try:
795
+ regex = re.compile(pattern)
796
+ # Check if any call has the parameter matching the pattern
797
+ found = False
798
+ for call in calls:
799
+ param_value = call.get("args", {}).get(param)
800
+ if param_value is not None:
801
+ param_str = str(param_value)
802
+ if regex.search(param_str):
803
+ found = True
804
+ break
805
+
806
+ assert found, f"Tool '{tool}' was not called with {param} matching pattern '{pattern}'"
807
+ except re.error as e:
808
+ raise AssertionError(f"Invalid regex pattern '{pattern}': {e}")
809
+
810
+
811
+ # Fuzzy string matching steps
812
+
813
+
814
+ def register_fuzzy_steps(registry: StepRegistry) -> None:
815
+ """Register fuzzy string matching steps."""
816
+
817
+ # Output fuzzy matching (default threshold)
818
+ registry.register(
819
+ r'the output (?P<key>\w+) should be similar to "(?P<text>.+)"',
820
+ step_output_similar_default,
821
+ )
822
+
823
+ # Output fuzzy matching (custom threshold)
824
+ registry.register(
825
+ r'the output (?P<key>\w+) should be similar to "(?P<text>.+)" with (?P<threshold>\d+)% similarity',
826
+ step_output_similar_threshold,
827
+ )
828
+
829
+ # State fuzzy matching (default threshold)
830
+ registry.register(
831
+ r'the state (?P<key>\w+) should be similar to "(?P<text>.+)"',
832
+ step_state_similar_default,
833
+ )
834
+
835
+ # State fuzzy matching (custom threshold)
836
+ registry.register(
837
+ r'the state (?P<key>\w+) should be similar to "(?P<text>.+)" with (?P<threshold>\d+)% similarity',
838
+ step_state_similar_threshold,
839
+ )
840
+
841
+
842
+ def step_output_similar_default(context: Any, key: str, text: str) -> None:
843
+ """Check if output is similar to expected text (80% default threshold)."""
844
+ step_output_similar_threshold(context, key, text, "80")
845
+
846
+
847
+ def step_output_similar_threshold(context: Any, key: str, text: str, threshold: str) -> None:
848
+ """Check if output is similar to expected text with custom threshold."""
849
+ from rapidfuzz import fuzz
850
+
851
+ actual = context.output_get(key)
852
+ actual_str = str(actual) if actual is not None else ""
853
+
854
+ threshold_val = int(threshold)
855
+ similarity = fuzz.ratio(actual_str, text)
856
+
857
+ assert similarity >= threshold_val, (
858
+ f"Output '{key}' similarity is {similarity}% (expected >= {threshold_val}%)\n"
859
+ f" Actual: '{actual_str}'\n"
860
+ f" Expected: '{text}'"
861
+ )
862
+
863
+
864
+ def step_state_similar_default(context: Any, key: str, text: str) -> None:
865
+ """Check if state is similar to expected text (80% default threshold)."""
866
+ step_state_similar_threshold(context, key, text, "80")
867
+
868
+
869
+ def step_state_similar_threshold(context: Any, key: str, text: str, threshold: str) -> None:
870
+ """Check if state is similar to expected text with custom threshold."""
871
+ from rapidfuzz import fuzz
872
+
873
+ actual = context.state_get(key)
874
+ actual_str = str(actual) if actual is not None else ""
875
+
876
+ threshold_val = int(threshold)
877
+ similarity = fuzz.ratio(actual_str, text)
878
+
879
+ assert similarity >= threshold_val, (
880
+ f"State '{key}' similarity is {similarity}% (expected >= {threshold_val}%)\n"
881
+ f" Actual: '{actual_str}'\n"
882
+ f" Expected: '{text}'"
883
+ )
884
+
885
+
886
+ # Model-related steps
887
+
888
+
889
+ def register_model_steps(registry: StepRegistry) -> None:
890
+ """Register model-related step definitions."""
891
+
892
+ # Model prediction step (When clause)
893
+ registry.register(r"the (?P<model>\w+) model predicts", step_model_predicts)
894
+
895
+
896
+ def step_model_predicts(context: Any, model: str) -> None:
897
+ """Trigger model prediction by running the procedure.
898
+
899
+ This step runs the procedure which should contain the model prediction.
900
+ """
901
+ # Model prediction happens during procedure execution
902
+ context.run_procedure()