tactus 0.30.0__py3-none-any.whl → 0.31.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tactus/__init__.py +1 -1
- tactus/adapters/lua_tools.py +23 -1
- tactus/adapters/mcp_manager.py +62 -35
- tactus/broker/server.py +314 -0
- tactus/cli/app.py +11 -1
- tactus/core/dsl_stubs.py +138 -41
- tactus/core/output_validator.py +69 -15
- tactus/core/registry.py +13 -25
- tactus/core/runtime.py +208 -69
- tactus/dspy/agent.py +87 -30
- tactus/ide/server.py +0 -10
- tactus/primitives/__init__.py +0 -2
- tactus/primitives/handles.py +8 -3
- tactus/primitives/procedure_callable.py +36 -0
- tactus/protocols/config.py +0 -5
- tactus/protocols/result.py +3 -3
- tactus/stdlib/tac/tactus/tools/done.tac +1 -1
- tactus/stdlib/tac/tactus/tools/log.tac +1 -1
- tactus/testing/README.md +1 -12
- tactus/testing/behave_integration.py +12 -2
- tactus/testing/context.py +156 -46
- tactus/testing/mock_agent.py +43 -8
- tactus/testing/steps/builtin.py +264 -54
- tactus/testing/test_runner.py +6 -0
- tactus/validation/semantic_visitor.py +19 -11
- {tactus-0.30.0.dist-info → tactus-0.31.1.dist-info}/METADATA +9 -11
- {tactus-0.30.0.dist-info → tactus-0.31.1.dist-info}/RECORD +30 -31
- tactus/primitives/stage.py +0 -202
- {tactus-0.30.0.dist-info → tactus-0.31.1.dist-info}/WHEEL +0 -0
- {tactus-0.30.0.dist-info → tactus-0.31.1.dist-info}/entry_points.txt +0 -0
- {tactus-0.30.0.dist-info → tactus-0.31.1.dist-info}/licenses/LICENSE +0 -0
tactus/testing/steps/builtin.py
CHANGED
|
@@ -3,7 +3,6 @@ Built-in step definitions for Tactus primitives.
|
|
|
3
3
|
|
|
4
4
|
Provides a comprehensive library of steps for testing:
|
|
5
5
|
- Tool calls
|
|
6
|
-
- Stage transitions
|
|
7
6
|
- State management
|
|
8
7
|
- Procedure completion
|
|
9
8
|
- Iterations and timing
|
|
@@ -14,6 +13,7 @@ Provides a comprehensive library of steps for testing:
|
|
|
14
13
|
|
|
15
14
|
import logging
|
|
16
15
|
import re
|
|
16
|
+
import ast
|
|
17
17
|
from typing import Any
|
|
18
18
|
|
|
19
19
|
from .registry import StepRegistry
|
|
@@ -22,6 +22,30 @@ from .registry import StepRegistry
|
|
|
22
22
|
logger = logging.getLogger(__name__)
|
|
23
23
|
|
|
24
24
|
|
|
25
|
+
def _parse_step_string_literal(value: str) -> tuple[str, bool]:
|
|
26
|
+
"""
|
|
27
|
+
Parse an optional quoted string literal from a step capture group.
|
|
28
|
+
|
|
29
|
+
Supports single-quoted or double-quoted Python-style escapes, e.g.:
|
|
30
|
+
"Hello! I'm World"
|
|
31
|
+
'He said: "hi"'
|
|
32
|
+
"Line 1\\nLine 2"
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
(parsed_value, was_quoted)
|
|
36
|
+
"""
|
|
37
|
+
stripped = value.strip()
|
|
38
|
+
if len(stripped) >= 2 and stripped[0] in {"'", '"'} and stripped[-1] == stripped[0]:
|
|
39
|
+
try:
|
|
40
|
+
parsed = ast.literal_eval(stripped)
|
|
41
|
+
if isinstance(parsed, str):
|
|
42
|
+
return parsed, True
|
|
43
|
+
except Exception:
|
|
44
|
+
# Fall back to raw string if the literal is malformed.
|
|
45
|
+
return stripped, True
|
|
46
|
+
return value, False
|
|
47
|
+
|
|
48
|
+
|
|
25
49
|
def register_builtin_steps(registry: StepRegistry) -> None:
|
|
26
50
|
"""
|
|
27
51
|
Register all built-in step definitions.
|
|
@@ -32,9 +56,6 @@ def register_builtin_steps(registry: StepRegistry) -> None:
|
|
|
32
56
|
# Tool-related steps
|
|
33
57
|
register_tool_steps(registry)
|
|
34
58
|
|
|
35
|
-
# Stage-related steps
|
|
36
|
-
register_stage_steps(registry)
|
|
37
|
-
|
|
38
59
|
# State-related steps
|
|
39
60
|
register_state_steps(registry)
|
|
40
61
|
|
|
@@ -69,34 +90,40 @@ def register_builtin_steps(registry: StepRegistry) -> None:
|
|
|
69
90
|
def register_tool_steps(registry: StepRegistry) -> None:
|
|
70
91
|
"""Register tool-related step definitions."""
|
|
71
92
|
|
|
72
|
-
registry.register(r"the (?P<tool
|
|
93
|
+
registry.register(r"the (?P<tool>[-\w]+) tool should be called", step_tool_called)
|
|
73
94
|
|
|
74
|
-
registry.register(r"the (?P<tool
|
|
95
|
+
registry.register(r"the (?P<tool>[-\w]+) tool should not be called", step_tool_not_called)
|
|
75
96
|
|
|
76
97
|
registry.register(
|
|
77
|
-
r"the (?P<tool
|
|
98
|
+
r"the (?P<tool>[-\w]+) tool should be called at least (?P<n>\d+) time",
|
|
78
99
|
step_tool_called_at_least,
|
|
79
100
|
)
|
|
80
101
|
|
|
81
102
|
registry.register(
|
|
82
|
-
r"the (?P<tool
|
|
103
|
+
r"the (?P<tool>[-\w]+) tool should be called at least (?P<n>\d+) times",
|
|
83
104
|
step_tool_called_at_least,
|
|
84
105
|
)
|
|
85
106
|
|
|
86
107
|
registry.register(
|
|
87
|
-
r"the (?P<tool
|
|
108
|
+
r"the (?P<tool>[-\w]+) tool should be called exactly (?P<n>\d+) time",
|
|
109
|
+
step_tool_called_exactly,
|
|
88
110
|
)
|
|
89
111
|
|
|
90
112
|
registry.register(
|
|
91
|
-
r"the (?P<tool
|
|
113
|
+
r"the (?P<tool>[-\w]+) tool should be called exactly (?P<n>\d+) times",
|
|
92
114
|
step_tool_called_exactly,
|
|
93
115
|
)
|
|
94
116
|
|
|
95
117
|
registry.register(
|
|
96
|
-
r"the (?P<tool
|
|
118
|
+
r"the (?P<tool>[-\w]+) tool should be called with (?P<param>\w+)=(?P<value>.+)",
|
|
97
119
|
step_tool_called_with_param,
|
|
98
120
|
)
|
|
99
121
|
|
|
122
|
+
registry.register(
|
|
123
|
+
r'the tool "(?P<tool>[-\w]+)" returns (?P<value>.+)',
|
|
124
|
+
step_mock_tool_returns,
|
|
125
|
+
)
|
|
126
|
+
|
|
100
127
|
|
|
101
128
|
def step_tool_called(context: Any, tool: str) -> None:
|
|
102
129
|
"""Check if a tool was called."""
|
|
@@ -132,24 +159,20 @@ def step_tool_called_with_param(context: Any, tool: str, param: str, value: str)
|
|
|
132
159
|
assert found, f"Tool '{tool}' was not called with {param}={value}"
|
|
133
160
|
|
|
134
161
|
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
registry.register(r"the stage should be (?P<stage>\w+)", step_stage_is)
|
|
162
|
+
def step_mock_tool_returns(context: Any, tool: str, value: str) -> None:
|
|
163
|
+
"""Configure a runtime tool mock response for this scenario."""
|
|
164
|
+
parsed_value, was_quoted = _parse_step_string_literal(value)
|
|
165
|
+
if not was_quoted:
|
|
166
|
+
try:
|
|
167
|
+
parsed_value = ast.literal_eval(parsed_value)
|
|
168
|
+
except Exception:
|
|
169
|
+
# Treat unquoted values as plain strings (e.g., positive/neutral)
|
|
170
|
+
pass
|
|
146
171
|
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
step_stage_transition,
|
|
150
|
-
)
|
|
172
|
+
if not hasattr(context, "mock_tool_returns"):
|
|
173
|
+
raise AssertionError("Context does not support tool mocking")
|
|
151
174
|
|
|
152
|
-
|
|
175
|
+
context.mock_tool_returns(tool, parsed_value)
|
|
153
176
|
|
|
154
177
|
|
|
155
178
|
def step_procedure_started(context: Any) -> None:
|
|
@@ -159,32 +182,6 @@ def step_procedure_started(context: Any) -> None:
|
|
|
159
182
|
assert context is not None, "Test context not initialized"
|
|
160
183
|
|
|
161
184
|
|
|
162
|
-
def step_stage_is(context: Any, stage: str) -> None:
|
|
163
|
-
"""Check if current stage matches expected."""
|
|
164
|
-
current = context.current_stage()
|
|
165
|
-
assert current == stage, f"Expected stage '{stage}', but current stage is '{current}'"
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
def step_stage_transition(context: Any, from_stage: str, to_stage: str) -> None:
|
|
169
|
-
"""Check if stage transition occurred."""
|
|
170
|
-
history = context.stage_history()
|
|
171
|
-
|
|
172
|
-
# Build list of transitions
|
|
173
|
-
transitions = [(history[i], history[i + 1]) for i in range(len(history) - 1)]
|
|
174
|
-
|
|
175
|
-
expected_transition = (from_stage, to_stage)
|
|
176
|
-
assert expected_transition in transitions, (
|
|
177
|
-
f"Stage transition from '{from_stage}' to '{to_stage}' did not occur. "
|
|
178
|
-
f"Actual transitions: {transitions}"
|
|
179
|
-
)
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
def step_in_stage(context: Any, stage: str) -> None:
|
|
183
|
-
"""Check if currently in specified stage."""
|
|
184
|
-
current = context.current_stage()
|
|
185
|
-
assert current == stage, f"Not in stage '{stage}', current stage is '{current}'"
|
|
186
|
-
|
|
187
|
-
|
|
188
185
|
# State-related steps
|
|
189
186
|
|
|
190
187
|
|
|
@@ -201,8 +198,12 @@ def register_state_steps(registry: StepRegistry) -> None:
|
|
|
201
198
|
def step_state_equals(context: Any, key: str, value: str) -> None:
|
|
202
199
|
"""Check if state value equals expected."""
|
|
203
200
|
actual = context.state_get(key)
|
|
201
|
+
value, was_quoted = _parse_step_string_literal(value)
|
|
204
202
|
# Convert to string for comparison
|
|
205
203
|
actual_str = str(actual) if actual is not None else "None"
|
|
204
|
+
if was_quoted:
|
|
205
|
+
assert actual_str == value, f"State '{key}' is '{actual_str}', expected '{value}'"
|
|
206
|
+
return
|
|
206
207
|
assert actual_str == value, f"State '{key}' is '{actual_str}', expected '{value}'"
|
|
207
208
|
|
|
208
209
|
|
|
@@ -224,6 +225,14 @@ def step_state_contains(context: Any, key: str) -> None:
|
|
|
224
225
|
def register_output_steps(registry: StepRegistry) -> None:
|
|
225
226
|
"""Register output-related step definitions."""
|
|
226
227
|
|
|
228
|
+
registry.register(r"the output should exist", step_output_value_exists)
|
|
229
|
+
registry.register(r"the output should be (?P<value>.+)", step_output_value_equals)
|
|
230
|
+
registry.register(
|
|
231
|
+
r"the output should fuzzy match (?P<value>.+) with threshold (?P<threshold>[0-9]*\.?[0-9]+)",
|
|
232
|
+
step_output_value_fuzzy_match,
|
|
233
|
+
)
|
|
234
|
+
registry.register(r"the output should fuzzy match (?P<value>.+)", step_output_value_fuzzy_match)
|
|
235
|
+
|
|
227
236
|
registry.register(r"the output (?P<key>\w+) should be (?P<value>.+)", step_output_equals)
|
|
228
237
|
|
|
229
238
|
registry.register(
|
|
@@ -238,6 +247,11 @@ def register_output_steps(registry: StepRegistry) -> None:
|
|
|
238
247
|
def step_output_equals(context: Any, key: str, value: str) -> None:
|
|
239
248
|
"""Check if output value equals expected."""
|
|
240
249
|
actual = context.output_get(key)
|
|
250
|
+
value, was_quoted = _parse_step_string_literal(value)
|
|
251
|
+
if was_quoted:
|
|
252
|
+
actual_str = str(actual) if actual is not None else "None"
|
|
253
|
+
assert actual_str == value, f"Output '{key}' is '{actual_str}', expected '{value}'"
|
|
254
|
+
return
|
|
241
255
|
|
|
242
256
|
# Handle boolean comparison specially
|
|
243
257
|
if value.lower() in ("true", "false"):
|
|
@@ -266,9 +280,132 @@ def step_output_equals(context: Any, key: str, value: str) -> None:
|
|
|
266
280
|
assert actual_str == value, f"Output '{key}' is '{actual_str}', expected '{value}'"
|
|
267
281
|
|
|
268
282
|
|
|
283
|
+
def step_output_value_exists(context: Any) -> None:
|
|
284
|
+
"""Check if scalar output exists (non-None)."""
|
|
285
|
+
actual = context.output_value()
|
|
286
|
+
assert actual is not None, "Output is missing"
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
def step_output_value_equals(context: Any, value: str) -> None:
|
|
290
|
+
"""Check if scalar output equals expected."""
|
|
291
|
+
actual = context.output_value()
|
|
292
|
+
value, was_quoted = _parse_step_string_literal(value)
|
|
293
|
+
if was_quoted:
|
|
294
|
+
actual_str = str(actual) if actual is not None else "None"
|
|
295
|
+
assert actual_str == value, f"Output is '{actual_str}', expected '{value}'"
|
|
296
|
+
return
|
|
297
|
+
|
|
298
|
+
# Handle boolean comparison specially
|
|
299
|
+
if value.lower() in ("true", "false"):
|
|
300
|
+
expected_bool = value.lower() == "true"
|
|
301
|
+
if isinstance(actual, bool):
|
|
302
|
+
assert actual == expected_bool, f"Output is {actual}, expected {expected_bool}"
|
|
303
|
+
else:
|
|
304
|
+
actual_str = str(actual).lower() if actual is not None else "none"
|
|
305
|
+
assert actual_str == value.lower(), f"Output is '{actual}', expected '{value}'"
|
|
306
|
+
return
|
|
307
|
+
|
|
308
|
+
# Try numeric comparison first
|
|
309
|
+
try:
|
|
310
|
+
expected_num = float(value)
|
|
311
|
+
if isinstance(actual, (int, float)):
|
|
312
|
+
assert actual == expected_num, f"Output is {actual}, expected {expected_num}"
|
|
313
|
+
else:
|
|
314
|
+
actual_num = float(actual)
|
|
315
|
+
assert actual_num == expected_num, f"Output is '{actual}', expected {expected_num}"
|
|
316
|
+
return
|
|
317
|
+
except (ValueError, TypeError):
|
|
318
|
+
pass
|
|
319
|
+
|
|
320
|
+
actual_str = str(actual) if actual is not None else "None"
|
|
321
|
+
assert actual_str == value, f"Output is '{actual_str}', expected '{value}'"
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
def step_output_value_fuzzy_match(context: Any, value: str, threshold: str = "0.8") -> None:
|
|
325
|
+
"""Check if scalar output is similar to expected value above a threshold.
|
|
326
|
+
|
|
327
|
+
This is a deterministic, non-LLM fuzzy match based on string similarity.
|
|
328
|
+
|
|
329
|
+
Default behavior:
|
|
330
|
+
- Case-insensitive (compares lowercased text)
|
|
331
|
+
- Punctuation-insensitive (strips punctuation)
|
|
332
|
+
|
|
333
|
+
Multi-match syntax (best-effort):
|
|
334
|
+
Then the output should fuzzy match any of ["Hello", "Hi", "Hey"] with threshold 0.9
|
|
335
|
+
"""
|
|
336
|
+
import difflib
|
|
337
|
+
|
|
338
|
+
def _normalize_text(text: str) -> str:
|
|
339
|
+
# Lowercase + strip punctuation + collapse whitespace.
|
|
340
|
+
normalized = re.sub(r"[^\w\s]", "", text.lower())
|
|
341
|
+
normalized = re.sub(r"\s+", " ", normalized).strip()
|
|
342
|
+
return normalized
|
|
343
|
+
|
|
344
|
+
actual = context.output_value()
|
|
345
|
+
assert actual is not None, "Output is missing"
|
|
346
|
+
|
|
347
|
+
try:
|
|
348
|
+
threshold_f = float(threshold)
|
|
349
|
+
except ValueError:
|
|
350
|
+
raise AssertionError(f"Invalid threshold: {threshold}")
|
|
351
|
+
|
|
352
|
+
expected_raw, was_quoted = _parse_step_string_literal(value)
|
|
353
|
+
expected_raw = expected_raw.strip() if not was_quoted else expected_raw
|
|
354
|
+
|
|
355
|
+
expected_values: list[str]
|
|
356
|
+
|
|
357
|
+
if expected_raw.lower().startswith("any of "):
|
|
358
|
+
values_str = expected_raw[7:].strip()
|
|
359
|
+
try:
|
|
360
|
+
parsed = ast.literal_eval(values_str)
|
|
361
|
+
except Exception:
|
|
362
|
+
parsed = None
|
|
363
|
+
|
|
364
|
+
expected_values = []
|
|
365
|
+
if isinstance(parsed, (list, tuple)):
|
|
366
|
+
for item in parsed:
|
|
367
|
+
expected_values.append(item if isinstance(item, str) else str(item))
|
|
368
|
+
else:
|
|
369
|
+
parts = [p.strip() for p in values_str.split(",") if p.strip()]
|
|
370
|
+
for part in parts:
|
|
371
|
+
parsed_part, _ = _parse_step_string_literal(part)
|
|
372
|
+
expected_values.append(parsed_part)
|
|
373
|
+
|
|
374
|
+
if not expected_values:
|
|
375
|
+
raise AssertionError(f"No expected values provided: {value}")
|
|
376
|
+
else:
|
|
377
|
+
expected_values = [expected_raw]
|
|
378
|
+
|
|
379
|
+
actual_norm = _normalize_text(str(actual))
|
|
380
|
+
best_ratio = -1.0
|
|
381
|
+
best_expected = None
|
|
382
|
+
|
|
383
|
+
for expected in expected_values:
|
|
384
|
+
expected_norm = _normalize_text(expected)
|
|
385
|
+
if expected_norm and (expected_norm in actual_norm or actual_norm in expected_norm):
|
|
386
|
+
ratio = 1.0
|
|
387
|
+
else:
|
|
388
|
+
ratio = difflib.SequenceMatcher(None, actual_norm, expected_norm).ratio()
|
|
389
|
+
|
|
390
|
+
if ratio > best_ratio:
|
|
391
|
+
best_ratio = ratio
|
|
392
|
+
best_expected = expected
|
|
393
|
+
|
|
394
|
+
assert best_ratio >= threshold_f, (
|
|
395
|
+
f"Output similarity is {best_ratio:.3f} (threshold {threshold_f:.3f}). "
|
|
396
|
+
f"Output is '{actual}', best match was '{best_expected}'. "
|
|
397
|
+
f"Expected: {expected_values}"
|
|
398
|
+
)
|
|
399
|
+
|
|
400
|
+
|
|
269
401
|
def step_output_not_equals(context: Any, key: str, value: str) -> None:
|
|
270
402
|
"""Check if output value does not equal the specified value."""
|
|
271
403
|
actual = context.output_get(key)
|
|
404
|
+
value, was_quoted = _parse_step_string_literal(value)
|
|
405
|
+
if was_quoted:
|
|
406
|
+
actual_str = str(actual) if actual is not None else "None"
|
|
407
|
+
assert actual_str != value, f"Output '{key}' is '{actual_str}', should not be '{value}'"
|
|
408
|
+
return
|
|
272
409
|
|
|
273
410
|
# Handle boolean comparison specially
|
|
274
411
|
if value.lower() in ("true", "false"):
|
|
@@ -319,6 +456,7 @@ def step_output_contains(context: Any, key: str) -> None:
|
|
|
319
456
|
def register_completion_steps(registry: StepRegistry) -> None:
|
|
320
457
|
"""Register completion-related step definitions."""
|
|
321
458
|
|
|
459
|
+
registry.register(r"the procedure has started", step_procedure_started)
|
|
322
460
|
registry.register(r"the procedure should complete successfully", step_procedure_completes)
|
|
323
461
|
|
|
324
462
|
registry.register(r"the procedure should fail", step_procedure_fails)
|
|
@@ -478,6 +616,23 @@ def register_agent_steps(registry: StepRegistry) -> None:
|
|
|
478
616
|
|
|
479
617
|
registry.register(r"the (?P<agent>\w+) agent takes turns", step_agent_takes_turn)
|
|
480
618
|
|
|
619
|
+
registry.register(
|
|
620
|
+
r'the agent "(?P<agent>[^"]+)" responds with (?P<message>.+)',
|
|
621
|
+
step_mock_agent_responds_with,
|
|
622
|
+
)
|
|
623
|
+
|
|
624
|
+
registry.register(
|
|
625
|
+
r'the agent "(?P<agent>[^"]+)" calls tool "(?P<tool>[^"]+)" with args (?P<args>.+)',
|
|
626
|
+
step_mock_agent_calls_tool_with_args,
|
|
627
|
+
)
|
|
628
|
+
|
|
629
|
+
registry.register(
|
|
630
|
+
r'the agent "(?P<agent>[^"]+)" returns data (?P<data>.+)',
|
|
631
|
+
step_mock_agent_returns_data,
|
|
632
|
+
)
|
|
633
|
+
|
|
634
|
+
registry.register(r"the message is (?P<message>.+)", step_set_scenario_message)
|
|
635
|
+
|
|
481
636
|
registry.register(r"the procedure run", step_procedure_runs)
|
|
482
637
|
|
|
483
638
|
registry.register(r"the procedure runs", step_procedure_runs)
|
|
@@ -490,6 +645,61 @@ def step_agent_takes_turn(context: Any, agent: str) -> None:
|
|
|
490
645
|
context.run_procedure()
|
|
491
646
|
|
|
492
647
|
|
|
648
|
+
def step_mock_agent_responds_with(
|
|
649
|
+
context: Any, agent: str, message: str, when_message: str | None = None
|
|
650
|
+
) -> None:
|
|
651
|
+
"""Configure a per-scenario mock agent response (temporal)."""
|
|
652
|
+
message, _ = _parse_step_string_literal(message)
|
|
653
|
+
when_message_parsed = None
|
|
654
|
+
if when_message is not None:
|
|
655
|
+
when_message_parsed, _ = _parse_step_string_literal(when_message)
|
|
656
|
+
if not hasattr(context, "mock_agent_response"):
|
|
657
|
+
raise AssertionError("Context does not support agent mocking")
|
|
658
|
+
context.mock_agent_response(agent, message, when_message=when_message_parsed)
|
|
659
|
+
|
|
660
|
+
|
|
661
|
+
def step_set_scenario_message(context: Any, message: str) -> None:
|
|
662
|
+
"""Set the scenario's primary message for coordinating mocks with expectations."""
|
|
663
|
+
message, _ = _parse_step_string_literal(message)
|
|
664
|
+
if not hasattr(context, "set_scenario_message"):
|
|
665
|
+
raise AssertionError("Context does not support scenario message")
|
|
666
|
+
context.set_scenario_message(message)
|
|
667
|
+
|
|
668
|
+
|
|
669
|
+
def step_mock_agent_calls_tool_with_args(context: Any, agent: str, tool: str, args: str) -> None:
|
|
670
|
+
"""Configure a per-scenario mocked agent tool call (recorded into Tool primitive)."""
|
|
671
|
+
args_str, _ = _parse_step_string_literal(args)
|
|
672
|
+
try:
|
|
673
|
+
parsed_args = ast.literal_eval(args_str)
|
|
674
|
+
except Exception:
|
|
675
|
+
raise AssertionError(f"Invalid tool args literal: {args}")
|
|
676
|
+
|
|
677
|
+
if not isinstance(parsed_args, dict):
|
|
678
|
+
raise AssertionError(f"Tool args must be an object/dict, got {type(parsed_args).__name__}")
|
|
679
|
+
|
|
680
|
+
if not hasattr(context, "mock_agent_tool_call"):
|
|
681
|
+
raise AssertionError("Context does not support agent tool call mocking")
|
|
682
|
+
|
|
683
|
+
context.mock_agent_tool_call(agent, tool, parsed_args)
|
|
684
|
+
|
|
685
|
+
|
|
686
|
+
def step_mock_agent_returns_data(context: Any, agent: str, data: str) -> None:
|
|
687
|
+
"""Configure structured output mock data for an agent's next mocked turn."""
|
|
688
|
+
data_str, _ = _parse_step_string_literal(data)
|
|
689
|
+
try:
|
|
690
|
+
parsed = ast.literal_eval(data_str)
|
|
691
|
+
except Exception:
|
|
692
|
+
raise AssertionError(f"Invalid data literal: {data}")
|
|
693
|
+
|
|
694
|
+
if not isinstance(parsed, dict):
|
|
695
|
+
raise AssertionError(f"Data must be an object/dict, got {type(parsed).__name__}")
|
|
696
|
+
|
|
697
|
+
if not hasattr(context, "mock_agent_data"):
|
|
698
|
+
raise AssertionError("Context does not support agent data mocking")
|
|
699
|
+
|
|
700
|
+
context.mock_agent_data(agent, parsed)
|
|
701
|
+
|
|
702
|
+
|
|
493
703
|
def step_procedure_runs(context: Any) -> None:
|
|
494
704
|
"""Execute the procedure.
|
|
495
705
|
|
|
@@ -532,7 +742,7 @@ def register_regex_steps(registry: StepRegistry) -> None:
|
|
|
532
742
|
|
|
533
743
|
# Tool argument regex matching
|
|
534
744
|
registry.register(
|
|
535
|
-
r'the (?P<tool
|
|
745
|
+
r'the (?P<tool>[-\w]+) tool should be called with (?P<param>\w+) matching pattern "(?P<pattern>.+)"',
|
|
536
746
|
step_tool_arg_matches_pattern,
|
|
537
747
|
)
|
|
538
748
|
|
tactus/testing/test_runner.py
CHANGED
|
@@ -46,6 +46,8 @@ class TactusTestRunner:
|
|
|
46
46
|
procedure_file: Path,
|
|
47
47
|
mock_tools: Optional[Dict] = None,
|
|
48
48
|
params: Optional[Dict] = None,
|
|
49
|
+
mcp_servers: Optional[Dict] = None,
|
|
50
|
+
tool_paths: Optional[List[str]] = None,
|
|
49
51
|
mocked: bool = False,
|
|
50
52
|
):
|
|
51
53
|
if not BEHAVE_AVAILABLE:
|
|
@@ -54,6 +56,8 @@ class TactusTestRunner:
|
|
|
54
56
|
self.procedure_file = procedure_file
|
|
55
57
|
self.mock_tools = mock_tools or {}
|
|
56
58
|
self.params = params or {}
|
|
59
|
+
self.mcp_servers = mcp_servers or {}
|
|
60
|
+
self.tool_paths = tool_paths or []
|
|
57
61
|
self.mocked = mocked # Whether to use mocked dependencies
|
|
58
62
|
self.work_dir: Optional[Path] = None
|
|
59
63
|
self.parsed_feature: Optional[ParsedFeature] = None
|
|
@@ -83,6 +87,8 @@ class TactusTestRunner:
|
|
|
83
87
|
self.procedure_file,
|
|
84
88
|
mock_tools=self.mock_tools,
|
|
85
89
|
params=self.params,
|
|
90
|
+
mcp_servers=self.mcp_servers,
|
|
91
|
+
tool_paths=self.tool_paths,
|
|
86
92
|
mocked=self.mocked,
|
|
87
93
|
)
|
|
88
94
|
|
|
@@ -31,7 +31,6 @@ class TactusDSLVisitor(LuaParserVisitor):
|
|
|
31
31
|
"Procedure", # CamelCase
|
|
32
32
|
"Prompt", # CamelCase
|
|
33
33
|
"Hitl", # CamelCase
|
|
34
|
-
"Stages", # CamelCase
|
|
35
34
|
"Specification", # CamelCase
|
|
36
35
|
"Specifications", # CamelCase - Gherkin BDD specs
|
|
37
36
|
"Step", # CamelCase - Custom step definitions
|
|
@@ -467,17 +466,18 @@ class TactusDSLVisitor(LuaParserVisitor):
|
|
|
467
466
|
elif func_name == "Hitl": # CamelCase
|
|
468
467
|
if args and len(args) >= 2:
|
|
469
468
|
self.builder.register_hitl(args[0], args[1] if isinstance(args[1], dict) else {})
|
|
470
|
-
elif func_name == "Stages": # CamelCase
|
|
471
|
-
if args:
|
|
472
|
-
# Stages() can take multiple string arguments
|
|
473
|
-
self.builder.set_stages(args)
|
|
474
469
|
elif func_name == "Specification": # CamelCase
|
|
475
|
-
|
|
470
|
+
# Either:
|
|
471
|
+
# - Specification([[ Gherkin text ]]) (alias for Specifications)
|
|
472
|
+
# - Specification("name", { ... }) (structured form)
|
|
473
|
+
if args and len(args) == 1:
|
|
474
|
+
self.builder.register_specifications(args[0])
|
|
475
|
+
elif args and len(args) >= 2:
|
|
476
476
|
self.builder.register_specification(
|
|
477
477
|
args[0], args[1] if isinstance(args[1], list) else []
|
|
478
478
|
)
|
|
479
479
|
elif func_name == "Specifications": # CamelCase
|
|
480
|
-
# Specifications([[ Gherkin text ]])
|
|
480
|
+
# Specifications([[ Gherkin text ]]) (plural form; singular is Specification([[...]]))
|
|
481
481
|
if args and len(args) >= 1:
|
|
482
482
|
self.builder.register_specifications(args[0])
|
|
483
483
|
elif func_name == "Step": # CamelCase
|
|
@@ -485,11 +485,19 @@ class TactusDSLVisitor(LuaParserVisitor):
|
|
|
485
485
|
if args and len(args) >= 2:
|
|
486
486
|
self.builder.register_custom_step(args[0], args[1])
|
|
487
487
|
elif func_name == "Evaluation": # CamelCase
|
|
488
|
-
#
|
|
489
|
-
|
|
490
|
-
|
|
488
|
+
# Either:
|
|
489
|
+
# - Evaluation({ runs = 10, parallel = true }) (simple config)
|
|
490
|
+
# - Evaluation({ dataset = {...}, evaluators = {...}, ... }) (alias for Evaluations)
|
|
491
|
+
if args and len(args) >= 1 and isinstance(args[0], dict):
|
|
492
|
+
cfg = args[0]
|
|
493
|
+
if any(k in cfg for k in ("dataset", "dataset_file", "evaluators", "thresholds")):
|
|
494
|
+
self.builder.register_evaluations(cfg)
|
|
495
|
+
else:
|
|
496
|
+
self.builder.set_evaluation_config(cfg)
|
|
497
|
+
elif args and len(args) >= 1:
|
|
498
|
+
self.builder.set_evaluation_config({})
|
|
491
499
|
elif func_name == "Evaluations": # CamelCase
|
|
492
|
-
#
|
|
500
|
+
# Evaluation(s)({ dataset = {...}, evaluators = {...} })
|
|
493
501
|
if args and len(args) >= 1:
|
|
494
502
|
self.builder.register_evaluations(args[0] if isinstance(args[0], dict) else {})
|
|
495
503
|
elif func_name == "default_provider":
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: tactus
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.31.1
|
|
4
4
|
Summary: Tactus: Lua-based DSL for agentic workflows
|
|
5
5
|
Project-URL: Homepage, https://github.com/AnthusAI/Tactus
|
|
6
6
|
Project-URL: Documentation, https://github.com/AnthusAI/Tactus/tree/main/docs
|
|
@@ -170,7 +170,7 @@ Procedure {
|
|
|
170
170
|
end
|
|
171
171
|
}
|
|
172
172
|
|
|
173
|
-
|
|
173
|
+
Specification([[
|
|
174
174
|
Feature: Research
|
|
175
175
|
Scenario: Completes research
|
|
176
176
|
When the researcher agent takes turns
|
|
@@ -421,12 +421,12 @@ calculator = Agent {
|
|
|
421
421
|
done = tactus.done
|
|
422
422
|
|
|
423
423
|
text_processor = Agent {
|
|
424
|
-
|
|
425
|
-
done,
|
|
424
|
+
inline_tools = {
|
|
426
425
|
{name = "uppercase", input = {...}, handler = function(args)
|
|
427
426
|
return string.upper(args.text)
|
|
428
427
|
end}
|
|
429
|
-
}
|
|
428
|
+
},
|
|
429
|
+
tools = {done}
|
|
430
430
|
}
|
|
431
431
|
```
|
|
432
432
|
|
|
@@ -533,7 +533,7 @@ Procedure {
|
|
|
533
533
|
end
|
|
534
534
|
}
|
|
535
535
|
|
|
536
|
-
|
|
536
|
+
Specification([[
|
|
537
537
|
Feature: Greeting
|
|
538
538
|
Scenario: Agent greets and completes
|
|
539
539
|
When the greeter agent takes turns
|
|
@@ -616,7 +616,7 @@ This creates a rhythm: **tool call → summarization → tool call → summariza
|
|
|
616
616
|
|
|
617
617
|
**Why this matters:**
|
|
618
618
|
|
|
619
|
-
Without per-call control, an agent might call another tool when you just want it to explain the previous result. By temporarily restricting
|
|
619
|
+
Without per-call control, an agent might call another tool when you just want it to explain the previous result. By temporarily restricting toolsets to an empty set (`tools = {}`), you ensure the agent focuses on summarization.
|
|
620
620
|
|
|
621
621
|
**Other per-call overrides:**
|
|
622
622
|
|
|
@@ -731,10 +731,9 @@ Then the search tool should be called exactly 2 times
|
|
|
731
731
|
Then the search tool should be called with query=test
|
|
732
732
|
```
|
|
733
733
|
|
|
734
|
-
**State
|
|
734
|
+
**State Steps:**
|
|
735
735
|
```gherkin
|
|
736
736
|
Given the procedure has started
|
|
737
|
-
Then the stage should be processing
|
|
738
737
|
Then the state count should be 5
|
|
739
738
|
Then the state error should exist
|
|
740
739
|
```
|
|
@@ -866,10 +865,9 @@ Then the search tool should be called exactly 2 times
|
|
|
866
865
|
Then the search tool should be called with query=test
|
|
867
866
|
```
|
|
868
867
|
|
|
869
|
-
**State
|
|
868
|
+
**State Steps:**
|
|
870
869
|
```gherkin
|
|
871
870
|
Given the procedure has started
|
|
872
|
-
Then the stage should be processing
|
|
873
871
|
Then the state count should be 5
|
|
874
872
|
Then the state error should exist
|
|
875
873
|
```
|