tactus 0.30.0__py3-none-any.whl → 0.31.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tactus/__init__.py +1 -1
- tactus/adapters/lua_tools.py +23 -1
- tactus/adapters/mcp_manager.py +62 -35
- tactus/broker/server.py +314 -0
- tactus/cli/app.py +11 -1
- tactus/core/dsl_stubs.py +138 -41
- tactus/core/output_validator.py +69 -15
- tactus/core/registry.py +13 -25
- tactus/core/runtime.py +208 -69
- tactus/dspy/agent.py +87 -30
- tactus/ide/server.py +0 -10
- tactus/primitives/__init__.py +0 -2
- tactus/primitives/handles.py +8 -3
- tactus/primitives/procedure_callable.py +36 -0
- tactus/protocols/config.py +0 -5
- tactus/protocols/result.py +3 -3
- tactus/stdlib/tac/tactus/tools/done.tac +1 -1
- tactus/stdlib/tac/tactus/tools/log.tac +1 -1
- tactus/testing/README.md +1 -12
- tactus/testing/behave_integration.py +12 -2
- tactus/testing/context.py +156 -46
- tactus/testing/mock_agent.py +43 -8
- tactus/testing/steps/builtin.py +264 -54
- tactus/testing/test_runner.py +6 -0
- tactus/validation/semantic_visitor.py +19 -11
- {tactus-0.30.0.dist-info → tactus-0.31.1.dist-info}/METADATA +9 -11
- {tactus-0.30.0.dist-info → tactus-0.31.1.dist-info}/RECORD +30 -31
- tactus/primitives/stage.py +0 -202
- {tactus-0.30.0.dist-info → tactus-0.31.1.dist-info}/WHEEL +0 -0
- {tactus-0.30.0.dist-info → tactus-0.31.1.dist-info}/entry_points.txt +0 -0
- {tactus-0.30.0.dist-info → tactus-0.31.1.dist-info}/licenses/LICENSE +0 -0
tactus/primitives/__init__.py
CHANGED
|
@@ -10,7 +10,6 @@ from tactus.primitives.control import IterationsPrimitive, StopPrimitive
|
|
|
10
10
|
from tactus.primitives.tool import ToolPrimitive
|
|
11
11
|
from tactus.primitives.log import LogPrimitive
|
|
12
12
|
from tactus.primitives.step import StepPrimitive, CheckpointPrimitive
|
|
13
|
-
from tactus.primitives.stage import StagePrimitive
|
|
14
13
|
from tactus.primitives.json import JsonPrimitive
|
|
15
14
|
from tactus.primitives.retry import RetryPrimitive
|
|
16
15
|
from tactus.primitives.file import FilePrimitive
|
|
@@ -40,7 +39,6 @@ __all__ = [
|
|
|
40
39
|
"StepPrimitive",
|
|
41
40
|
"CheckpointPrimitive",
|
|
42
41
|
"MessageHistoryPrimitive",
|
|
43
|
-
"StagePrimitive",
|
|
44
42
|
"JsonPrimitive",
|
|
45
43
|
"RetryPrimitive",
|
|
46
44
|
"FilePrimitive",
|
tactus/primitives/handles.py
CHANGED
|
@@ -87,7 +87,7 @@ class AgentHandle:
|
|
|
87
87
|
self._execution_context: Optional[Any] = None
|
|
88
88
|
logger.debug(f"AgentHandle created for '{name}'")
|
|
89
89
|
|
|
90
|
-
def __call__(self, inputs=None):
|
|
90
|
+
def __call__(self, inputs=None):
|
|
91
91
|
"""
|
|
92
92
|
Execute an agent turn using the callable interface.
|
|
93
93
|
|
|
@@ -117,8 +117,13 @@ class AgentHandle:
|
|
|
117
117
|
f"This should not happen with immediate agent creation.\n"
|
|
118
118
|
f"Please report this as a bug with a minimal reproduction example."
|
|
119
119
|
)
|
|
120
|
-
# Convert Lua table to Python dict if needed
|
|
121
|
-
converted_inputs = _convert_lua_table(inputs) if inputs is not None else None
|
|
120
|
+
# Convert Lua table to Python dict if needed
|
|
121
|
+
converted_inputs = _convert_lua_table(inputs) if inputs is not None else None
|
|
122
|
+
|
|
123
|
+
# Convenience: allow shorthand string calls in Lua:
|
|
124
|
+
# World("Hello") == World({message = "Hello"})
|
|
125
|
+
if isinstance(converted_inputs, str):
|
|
126
|
+
converted_inputs = {"message": converted_inputs}
|
|
122
127
|
|
|
123
128
|
# If we have an execution context, checkpoint the agent call
|
|
124
129
|
logger.debug(
|
|
@@ -258,6 +258,42 @@ class ProcedureCallable:
|
|
|
258
258
|
Raises:
|
|
259
259
|
ValueError: If output is not a dict or missing required fields
|
|
260
260
|
"""
|
|
261
|
+
# If no output schema is declared, accept any return value.
|
|
262
|
+
if not self.output_schema:
|
|
263
|
+
return
|
|
264
|
+
|
|
265
|
+
# Scalar output schema support:
|
|
266
|
+
# output = field.string{...}
|
|
267
|
+
if (
|
|
268
|
+
isinstance(self.output_schema, dict)
|
|
269
|
+
and "type" in self.output_schema
|
|
270
|
+
and isinstance(self.output_schema.get("type"), str)
|
|
271
|
+
):
|
|
272
|
+
expected_type = self.output_schema.get("type")
|
|
273
|
+
if expected_type not in {"string", "number", "boolean", "object", "array"}:
|
|
274
|
+
# Not a scalar schema; treat as normal object schema.
|
|
275
|
+
expected_type = None
|
|
276
|
+
|
|
277
|
+
else:
|
|
278
|
+
expected_type = None
|
|
279
|
+
|
|
280
|
+
if expected_type is not None:
|
|
281
|
+
is_required = bool(self.output_schema.get("required", False))
|
|
282
|
+
if result is None and not is_required:
|
|
283
|
+
return
|
|
284
|
+
|
|
285
|
+
if expected_type == "string" and not isinstance(result, str):
|
|
286
|
+
raise ValueError(f"Procedure '{self.name}' must return string, got {type(result)}")
|
|
287
|
+
if expected_type == "number" and not isinstance(result, (int, float)):
|
|
288
|
+
raise ValueError(f"Procedure '{self.name}' must return number, got {type(result)}")
|
|
289
|
+
if expected_type == "boolean" and not isinstance(result, bool):
|
|
290
|
+
raise ValueError(f"Procedure '{self.name}' must return boolean, got {type(result)}")
|
|
291
|
+
if expected_type == "object" and not isinstance(result, dict):
|
|
292
|
+
raise ValueError(f"Procedure '{self.name}' must return object, got {type(result)}")
|
|
293
|
+
if expected_type == "array" and not isinstance(result, list):
|
|
294
|
+
raise ValueError(f"Procedure '{self.name}' must return array, got {type(result)}")
|
|
295
|
+
return
|
|
296
|
+
|
|
261
297
|
if not isinstance(result, dict):
|
|
262
298
|
raise ValueError(f"Procedure '{self.name}' must return dict, got {type(result)}")
|
|
263
299
|
|
tactus/protocols/config.py
CHANGED
|
@@ -89,11 +89,6 @@ class ProcedureConfig(BaseModel):
|
|
|
89
89
|
# HITL declarations
|
|
90
90
|
hitl: Dict[str, Any] = Field(default_factory=dict, description="Pre-defined HITL interactions")
|
|
91
91
|
|
|
92
|
-
# Stages (optional)
|
|
93
|
-
stages: List[str] = Field(
|
|
94
|
-
default_factory=list, description="Optional stage names for workflow progression"
|
|
95
|
-
)
|
|
96
|
-
|
|
97
92
|
# Sub-procedures (future)
|
|
98
93
|
procedures: Dict[str, Any] = Field(
|
|
99
94
|
default_factory=dict, description="Inline sub-procedure definitions (future feature)"
|
tactus/protocols/result.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Result object returned by cost-incurring primitives (e.g., Agents).
|
|
3
3
|
|
|
4
|
-
Standardizes on `result.
|
|
4
|
+
Standardizes on `result.output` for the returned data (string or structured).
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
7
|
from __future__ import annotations
|
|
@@ -17,12 +17,12 @@ class TactusResult(BaseModel):
|
|
|
17
17
|
"""
|
|
18
18
|
Standard Result wrapper for Lua and Python consumption.
|
|
19
19
|
|
|
20
|
-
- `
|
|
20
|
+
- `output`: The returned data (string or structured dict/list/etc.)
|
|
21
21
|
- `usage`: Token usage stats for the call that produced this result
|
|
22
22
|
- `cost_stats`: Cost stats for the call that produced this result
|
|
23
23
|
"""
|
|
24
24
|
|
|
25
|
-
|
|
25
|
+
output: Any = Field(..., description="Result output (string or structured data)")
|
|
26
26
|
usage: UsageStats = Field(default_factory=UsageStats)
|
|
27
27
|
cost_stats: CostStats = Field(default_factory=CostStats)
|
|
28
28
|
|
tactus/testing/README.md
CHANGED
|
@@ -7,7 +7,7 @@ First-class Gherkin-style BDD testing integrated into the Tactus DSL.
|
|
|
7
7
|
The Tactus BDD Testing Framework allows you to write behavior-driven tests directly in your procedure files using Gherkin syntax. Tests are executed using Behave under the hood, with full support for:
|
|
8
8
|
|
|
9
9
|
- **Natural language specifications** - Write tests in plain English using Gherkin
|
|
10
|
-
- **Built-in step library** - Comprehensive steps for Tactus primitives (tools,
|
|
10
|
+
- **Built-in step library** - Comprehensive steps for Tactus primitives (tools, state, etc.)
|
|
11
11
|
- **Custom steps** - Define your own steps in Lua for advanced assertions
|
|
12
12
|
- **Parallel execution** - Run scenarios in parallel for fast feedback
|
|
13
13
|
- **Consistency evaluation** - Run tests multiple times to measure reliability
|
|
@@ -88,15 +88,6 @@ Then the search tool should be called exactly 2 times
|
|
|
88
88
|
Then the search tool should be called with query=test
|
|
89
89
|
```
|
|
90
90
|
|
|
91
|
-
### Stage Steps
|
|
92
|
-
|
|
93
|
-
```gherkin
|
|
94
|
-
Given the procedure has started
|
|
95
|
-
Then the stage should be processing
|
|
96
|
-
Then the stage should transition from planning to executing
|
|
97
|
-
Given we are in stage complete
|
|
98
|
-
```
|
|
99
|
-
|
|
100
91
|
### State Steps
|
|
101
92
|
|
|
102
93
|
```gherkin
|
|
@@ -280,5 +271,3 @@ See `examples/with-bdd-tests.tac` for a complete example with:
|
|
|
280
271
|
|
|
281
272
|
|
|
282
273
|
|
|
283
|
-
|
|
284
|
-
|
|
@@ -8,7 +8,7 @@ from parsed Gherkin and registered steps.
|
|
|
8
8
|
import logging
|
|
9
9
|
import tempfile
|
|
10
10
|
from pathlib import Path
|
|
11
|
-
from typing import Dict, Optional
|
|
11
|
+
from typing import Dict, List, Optional
|
|
12
12
|
|
|
13
13
|
from .models import ParsedFeature, ParsedScenario
|
|
14
14
|
from .steps.registry import StepRegistry
|
|
@@ -222,6 +222,8 @@ class BehaveEnvironmentGenerator:
|
|
|
222
222
|
procedure_file: Path,
|
|
223
223
|
mock_tools: Optional[Dict] = None,
|
|
224
224
|
params: Optional[Dict] = None,
|
|
225
|
+
mcp_servers: Optional[Dict] = None,
|
|
226
|
+
tool_paths: Optional[List[str]] = None,
|
|
225
227
|
mocked: bool = False,
|
|
226
228
|
) -> Path:
|
|
227
229
|
"""
|
|
@@ -244,6 +246,8 @@ class BehaveEnvironmentGenerator:
|
|
|
244
246
|
|
|
245
247
|
mock_tools_json = json.dumps(mock_tools or {}).replace("'", "\\'")
|
|
246
248
|
params_json = json.dumps(params or {}).replace("'", "\\'")
|
|
249
|
+
mcp_servers_json = json.dumps(mcp_servers or {}).replace("'", "\\'")
|
|
250
|
+
tool_paths_json = json.dumps(tool_paths or []).replace("'", "\\'")
|
|
247
251
|
|
|
248
252
|
# Convert procedure_file to absolute path so it works from temp behave directory
|
|
249
253
|
absolute_procedure_file = Path(procedure_file).resolve()
|
|
@@ -278,6 +282,8 @@ class BehaveEnvironmentGenerator:
|
|
|
278
282
|
f.write(f" context.procedure_file = Path(r'{absolute_procedure_file}')\n")
|
|
279
283
|
f.write(f" context.mock_tools = json.loads('{mock_tools_json}')\n")
|
|
280
284
|
f.write(f" context.params = json.loads('{params_json}')\n")
|
|
285
|
+
f.write(f" context.mcp_servers = json.loads('{mcp_servers_json}')\n")
|
|
286
|
+
f.write(f" context.tool_paths = json.loads('{tool_paths_json}')\n")
|
|
281
287
|
f.write(f" context.mocked = {mocked}\n\n")
|
|
282
288
|
|
|
283
289
|
f.write("def before_scenario(context, scenario):\n")
|
|
@@ -291,6 +297,8 @@ class BehaveEnvironmentGenerator:
|
|
|
291
297
|
f.write(" procedure_file=context.procedure_file,\n")
|
|
292
298
|
f.write(" params=context.params,\n")
|
|
293
299
|
f.write(" mock_tools=context.mock_tools,\n")
|
|
300
|
+
f.write(" mcp_servers=context.mcp_servers,\n")
|
|
301
|
+
f.write(" tool_paths=context.tool_paths,\n")
|
|
294
302
|
f.write(" mocked=context.mocked,\n")
|
|
295
303
|
f.write(" )\n")
|
|
296
304
|
f.write(" \n")
|
|
@@ -330,6 +338,8 @@ def setup_behave_directory(
|
|
|
330
338
|
work_dir: Optional[Path] = None,
|
|
331
339
|
mock_tools: Optional[Dict] = None,
|
|
332
340
|
params: Optional[Dict] = None,
|
|
341
|
+
mcp_servers: Optional[Dict] = None,
|
|
342
|
+
tool_paths: Optional[List[str]] = None,
|
|
333
343
|
mocked: bool = False,
|
|
334
344
|
) -> Path:
|
|
335
345
|
"""
|
|
@@ -364,7 +374,7 @@ def setup_behave_directory(
|
|
|
364
374
|
|
|
365
375
|
# Generate environment.py with mock tools, params, and mocked flag
|
|
366
376
|
env_gen = BehaveEnvironmentGenerator()
|
|
367
|
-
env_gen.generate(work_dir, procedure_file, mock_tools, params, mocked)
|
|
377
|
+
env_gen.generate(work_dir, procedure_file, mock_tools, params, mcp_servers, tool_paths, mocked)
|
|
368
378
|
|
|
369
379
|
logger.info(f"Behave directory setup complete: {work_dir}")
|
|
370
380
|
return work_dir
|
tactus/testing/context.py
CHANGED
|
@@ -27,11 +27,15 @@ class TactusTestContext:
|
|
|
27
27
|
procedure_file: Path,
|
|
28
28
|
params: Optional[Dict] = None,
|
|
29
29
|
mock_tools: Optional[Dict] = None,
|
|
30
|
+
mcp_servers: Optional[Dict] = None,
|
|
31
|
+
tool_paths: Optional[List[str]] = None,
|
|
30
32
|
mocked: bool = False,
|
|
31
33
|
):
|
|
32
34
|
self.procedure_file = procedure_file
|
|
33
35
|
self.params = params or {}
|
|
34
36
|
self.mock_tools = mock_tools # tool_name -> mock_response
|
|
37
|
+
self.mcp_servers = mcp_servers or {}
|
|
38
|
+
self.tool_paths = tool_paths or []
|
|
35
39
|
self.mocked = mocked # Whether to use mocked dependencies
|
|
36
40
|
self.mock_registry = None # Unified mock registry for dependencies + HITL
|
|
37
41
|
self.runtime = None
|
|
@@ -41,6 +45,123 @@ class TactusTestContext:
|
|
|
41
45
|
self.total_cost: float = 0.0 # Track total cost
|
|
42
46
|
self.total_tokens: int = 0 # Track total tokens
|
|
43
47
|
self.cost_breakdown: List[Any] = [] # Track per-call costs
|
|
48
|
+
self._agent_mock_turns: Dict[str, List[Dict[str, Any]]] = {}
|
|
49
|
+
self._scenario_message: str | None = None
|
|
50
|
+
|
|
51
|
+
def set_scenario_message(self, message: str) -> None:
|
|
52
|
+
"""Set the scenario's primary injected message (for in-spec mocking coordination)."""
|
|
53
|
+
self._scenario_message = message
|
|
54
|
+
|
|
55
|
+
def get_scenario_message(self) -> str | None:
|
|
56
|
+
"""Get the scenario's primary injected message, if set."""
|
|
57
|
+
return self._scenario_message
|
|
58
|
+
|
|
59
|
+
def mock_agent_response(
|
|
60
|
+
self, agent: str, message: str, when_message: str | None = None
|
|
61
|
+
) -> None:
|
|
62
|
+
"""Add a mocked agent response for this scenario (temporal; 1 per agent turn).
|
|
63
|
+
|
|
64
|
+
If `when_message` is provided, the mock is selected when the agent is called
|
|
65
|
+
with that exact injected message.
|
|
66
|
+
"""
|
|
67
|
+
turn: Dict[str, Any] = {"message": message}
|
|
68
|
+
effective_when = when_message if when_message is not None else self._scenario_message
|
|
69
|
+
if effective_when is not None:
|
|
70
|
+
turn["when_message"] = effective_when
|
|
71
|
+
self._agent_mock_turns.setdefault(agent, []).append(turn)
|
|
72
|
+
|
|
73
|
+
# Ensure runtime exists and sees the same dict reference for this scenario.
|
|
74
|
+
if self.runtime is None:
|
|
75
|
+
self.setup_runtime()
|
|
76
|
+
if self.runtime is not None:
|
|
77
|
+
self.runtime.external_agent_mocks = self._agent_mock_turns
|
|
78
|
+
|
|
79
|
+
def mock_agent_tool_call(
|
|
80
|
+
self,
|
|
81
|
+
agent: str,
|
|
82
|
+
tool: str,
|
|
83
|
+
args: Dict[str, Any] | None = None,
|
|
84
|
+
when_message: str | None = None,
|
|
85
|
+
) -> None:
|
|
86
|
+
"""Add a mocked tool call to an agent's next mocked turn for this scenario."""
|
|
87
|
+
args = args or {}
|
|
88
|
+
|
|
89
|
+
effective_when = when_message if when_message is not None else self._scenario_message
|
|
90
|
+
if (
|
|
91
|
+
agent in self._agent_mock_turns
|
|
92
|
+
and self._agent_mock_turns[agent]
|
|
93
|
+
and (
|
|
94
|
+
effective_when is None
|
|
95
|
+
or self._agent_mock_turns[agent][-1].get("when_message") == effective_when
|
|
96
|
+
)
|
|
97
|
+
):
|
|
98
|
+
turn = self._agent_mock_turns[agent][-1]
|
|
99
|
+
else:
|
|
100
|
+
turn = {}
|
|
101
|
+
if effective_when is not None:
|
|
102
|
+
turn["when_message"] = effective_when
|
|
103
|
+
self._agent_mock_turns.setdefault(agent, []).append(turn)
|
|
104
|
+
|
|
105
|
+
tool_calls = turn.get("tool_calls")
|
|
106
|
+
if not isinstance(tool_calls, list):
|
|
107
|
+
tool_calls = []
|
|
108
|
+
turn["tool_calls"] = tool_calls
|
|
109
|
+
|
|
110
|
+
tool_calls.append({"tool": tool, "args": args})
|
|
111
|
+
|
|
112
|
+
if self.runtime is None:
|
|
113
|
+
self.setup_runtime()
|
|
114
|
+
if self.runtime is not None:
|
|
115
|
+
self.runtime.external_agent_mocks = self._agent_mock_turns
|
|
116
|
+
|
|
117
|
+
def mock_agent_data(
|
|
118
|
+
self, agent: str, data: Dict[str, Any], when_message: str | None = None
|
|
119
|
+
) -> None:
|
|
120
|
+
"""Set structured output mock data for an agent's next mocked turn.
|
|
121
|
+
|
|
122
|
+
This is only used when an agent has an output schema; the DSPy agent mock
|
|
123
|
+
logic will apply `data` as the structured `result.output`.
|
|
124
|
+
"""
|
|
125
|
+
if not isinstance(data, dict):
|
|
126
|
+
raise TypeError("mock_agent_data expects a dict")
|
|
127
|
+
|
|
128
|
+
effective_when = when_message if when_message is not None else self._scenario_message
|
|
129
|
+
if (
|
|
130
|
+
agent in self._agent_mock_turns
|
|
131
|
+
and self._agent_mock_turns[agent]
|
|
132
|
+
and (
|
|
133
|
+
effective_when is None
|
|
134
|
+
or self._agent_mock_turns[agent][-1].get("when_message") == effective_when
|
|
135
|
+
)
|
|
136
|
+
):
|
|
137
|
+
turn = self._agent_mock_turns[agent][-1]
|
|
138
|
+
else:
|
|
139
|
+
turn = {}
|
|
140
|
+
if effective_when is not None:
|
|
141
|
+
turn["when_message"] = effective_when
|
|
142
|
+
self._agent_mock_turns.setdefault(agent, []).append(turn)
|
|
143
|
+
|
|
144
|
+
turn["data"] = data
|
|
145
|
+
|
|
146
|
+
if self.runtime is None:
|
|
147
|
+
self.setup_runtime()
|
|
148
|
+
if self.runtime is not None:
|
|
149
|
+
self.runtime.external_agent_mocks = self._agent_mock_turns
|
|
150
|
+
|
|
151
|
+
def mock_tool_returns(self, tool: str, output: Any) -> None:
|
|
152
|
+
"""Configure a runtime tool mock (Mocks { tool = { returns = ... } } equivalent)."""
|
|
153
|
+
if self.runtime is None:
|
|
154
|
+
self.setup_runtime()
|
|
155
|
+
if self.runtime is None:
|
|
156
|
+
raise AssertionError("Runtime not initialized")
|
|
157
|
+
|
|
158
|
+
if self.runtime.mock_manager is None:
|
|
159
|
+
from tactus.core.mocking import MockManager
|
|
160
|
+
|
|
161
|
+
self.runtime.mock_manager = MockManager()
|
|
162
|
+
|
|
163
|
+
self.runtime.mock_manager.register_mock(tool, {"output": output})
|
|
164
|
+
self.runtime.mock_manager.enable_mock(tool)
|
|
44
165
|
|
|
45
166
|
def setup_runtime(self) -> None:
|
|
46
167
|
"""Initialize TactusRuntime with storage and handlers."""
|
|
@@ -80,6 +201,8 @@ class TactusTestContext:
|
|
|
80
201
|
openai_api_key=os.environ.get("OPENAI_API_KEY"), # Pass API key for real LLM calls
|
|
81
202
|
log_handler=log_handler, # Enable cost tracking
|
|
82
203
|
source_file_path=str(self.procedure_file.resolve()), # For require() path resolution
|
|
204
|
+
mcp_servers=self.mcp_servers,
|
|
205
|
+
tool_paths=self.tool_paths,
|
|
83
206
|
)
|
|
84
207
|
|
|
85
208
|
# Create MockManager for handling Mocks {} blocks when in mocked mode
|
|
@@ -88,6 +211,8 @@ class TactusTestContext:
|
|
|
88
211
|
|
|
89
212
|
self.runtime.mock_manager = MockManager()
|
|
90
213
|
logger.info("Created MockManager for Mocks {} block support")
|
|
214
|
+
# Mocked-mode tests should never call real LLMs by default.
|
|
215
|
+
self.runtime.mock_all_agents = True
|
|
91
216
|
|
|
92
217
|
logger.debug(f"Setup runtime for test: {self.procedure_file.stem}")
|
|
93
218
|
|
|
@@ -184,12 +309,6 @@ class TactusTestContext:
|
|
|
184
309
|
except Exception as e:
|
|
185
310
|
logger.debug(f"Could not capture Tool primitive: {e}")
|
|
186
311
|
|
|
187
|
-
# Capture Stage primitive
|
|
188
|
-
try:
|
|
189
|
-
self._primitives["stage"] = self.runtime.stage_primitive
|
|
190
|
-
except Exception as e:
|
|
191
|
-
logger.debug(f"Could not capture Stage primitive: {e}")
|
|
192
|
-
|
|
193
312
|
# Capture State primitive
|
|
194
313
|
try:
|
|
195
314
|
self._primitives["state"] = self.runtime.state_primitive
|
|
@@ -243,36 +362,6 @@ class TactusTestContext:
|
|
|
243
362
|
]
|
|
244
363
|
return []
|
|
245
364
|
|
|
246
|
-
# Stage-related methods
|
|
247
|
-
|
|
248
|
-
def current_stage(self) -> Optional[str]:
|
|
249
|
-
"""Get current stage."""
|
|
250
|
-
stage_prim = self._primitives.get("stage")
|
|
251
|
-
if stage_prim:
|
|
252
|
-
return stage_prim.current()
|
|
253
|
-
return None
|
|
254
|
-
|
|
255
|
-
def stage_history(self) -> List[str]:
|
|
256
|
-
"""Get stage transition history as list of stage names."""
|
|
257
|
-
stage_prim = self._primitives.get("stage")
|
|
258
|
-
if stage_prim and hasattr(stage_prim, "_history"):
|
|
259
|
-
# Extract just the stage names from history
|
|
260
|
-
stages = []
|
|
261
|
-
for transition in stage_prim._history:
|
|
262
|
-
if transition.get("from_stage"):
|
|
263
|
-
stages.append(transition["from_stage"])
|
|
264
|
-
if transition.get("to_stage"):
|
|
265
|
-
stages.append(transition["to_stage"])
|
|
266
|
-
# Remove duplicates while preserving order
|
|
267
|
-
seen = set()
|
|
268
|
-
result = []
|
|
269
|
-
for stage in stages:
|
|
270
|
-
if stage not in seen:
|
|
271
|
-
seen.add(stage)
|
|
272
|
-
result.append(stage)
|
|
273
|
-
return result
|
|
274
|
-
return []
|
|
275
|
-
|
|
276
365
|
# State-related methods
|
|
277
366
|
|
|
278
367
|
def state_get(self, key: str) -> Any:
|
|
@@ -296,12 +385,16 @@ class TactusTestContext:
|
|
|
296
385
|
if self.execution_result:
|
|
297
386
|
# Check if outputs are in a dedicated field
|
|
298
387
|
if "output" in self.execution_result:
|
|
299
|
-
|
|
388
|
+
output = self.execution_result["output"]
|
|
389
|
+
if isinstance(output, dict):
|
|
390
|
+
return output.get(key)
|
|
391
|
+
return None
|
|
300
392
|
# Otherwise check in the result dict (procedure return value)
|
|
301
|
-
if "result" in self.execution_result
|
|
302
|
-
self.execution_result["result"]
|
|
303
|
-
|
|
304
|
-
|
|
393
|
+
if "result" in self.execution_result:
|
|
394
|
+
result = self.execution_result["result"]
|
|
395
|
+
if isinstance(result, dict):
|
|
396
|
+
return result.get(key)
|
|
397
|
+
|
|
305
398
|
return None
|
|
306
399
|
|
|
307
400
|
def output_exists(self, key: str) -> bool:
|
|
@@ -309,14 +402,31 @@ class TactusTestContext:
|
|
|
309
402
|
if self.execution_result:
|
|
310
403
|
# Check if outputs are in a dedicated field
|
|
311
404
|
if "output" in self.execution_result:
|
|
312
|
-
|
|
405
|
+
output = self.execution_result["output"]
|
|
406
|
+
return isinstance(output, dict) and key in output
|
|
313
407
|
# Otherwise check in the result dict (procedure return value)
|
|
314
|
-
if "result" in self.execution_result
|
|
315
|
-
self.execution_result["result"]
|
|
316
|
-
|
|
317
|
-
|
|
408
|
+
if "result" in self.execution_result:
|
|
409
|
+
result = self.execution_result["result"]
|
|
410
|
+
if isinstance(result, dict):
|
|
411
|
+
return key in result
|
|
318
412
|
return False
|
|
319
413
|
|
|
414
|
+
def output_value(self) -> Any:
|
|
415
|
+
"""Get the full (possibly scalar) output value for the procedure."""
|
|
416
|
+
if not self.execution_result:
|
|
417
|
+
return None
|
|
418
|
+
if "output" in self.execution_result:
|
|
419
|
+
return self.execution_result["output"]
|
|
420
|
+
result = self.execution_result.get("result")
|
|
421
|
+
try:
|
|
422
|
+
from tactus.protocols.result import TactusResult
|
|
423
|
+
|
|
424
|
+
if isinstance(result, TactusResult):
|
|
425
|
+
return result.output
|
|
426
|
+
except Exception:
|
|
427
|
+
pass
|
|
428
|
+
return result
|
|
429
|
+
|
|
320
430
|
# Completion methods
|
|
321
431
|
|
|
322
432
|
def stop_success(self) -> bool:
|
tactus/testing/mock_agent.py
CHANGED
|
@@ -145,16 +145,51 @@ class MockAgentPrimitive:
|
|
|
145
145
|
f"}}"
|
|
146
146
|
)
|
|
147
147
|
|
|
148
|
+
temporal_turns = getattr(mock_config, "temporal", None) or []
|
|
149
|
+
if temporal_turns:
|
|
150
|
+
injected = opts.get("message")
|
|
151
|
+
|
|
152
|
+
selected_turn = None
|
|
153
|
+
if injected is not None:
|
|
154
|
+
for turn in temporal_turns:
|
|
155
|
+
if isinstance(turn, dict) and turn.get("when_message") == injected:
|
|
156
|
+
selected_turn = turn
|
|
157
|
+
break
|
|
158
|
+
|
|
159
|
+
if selected_turn is None:
|
|
160
|
+
idx = self.turn_count - 1 # 1-indexed turns
|
|
161
|
+
if idx < 0:
|
|
162
|
+
idx = 0
|
|
163
|
+
if idx >= len(temporal_turns):
|
|
164
|
+
idx = len(temporal_turns) - 1
|
|
165
|
+
selected_turn = temporal_turns[idx]
|
|
166
|
+
|
|
167
|
+
turn = selected_turn
|
|
168
|
+
if isinstance(turn, dict):
|
|
169
|
+
message = turn.get("message", mock_config.message)
|
|
170
|
+
tool_calls = turn.get("tool_calls", mock_config.tool_calls)
|
|
171
|
+
data = turn.get("data", mock_config.data)
|
|
172
|
+
raw_usage = turn.get("usage", mock_config.usage)
|
|
173
|
+
else:
|
|
174
|
+
message = mock_config.message
|
|
175
|
+
tool_calls = mock_config.tool_calls
|
|
176
|
+
data = mock_config.data
|
|
177
|
+
raw_usage = mock_config.usage
|
|
178
|
+
else:
|
|
179
|
+
message = mock_config.message
|
|
180
|
+
tool_calls = mock_config.tool_calls
|
|
181
|
+
data = mock_config.data
|
|
182
|
+
raw_usage = mock_config.usage
|
|
183
|
+
|
|
148
184
|
# Execute the configured tool calls
|
|
149
|
-
tool_calls_executed = self._execute_tool_calls(
|
|
185
|
+
tool_calls_executed = self._execute_tool_calls(tool_calls)
|
|
150
186
|
|
|
151
187
|
# Structured payload (optional) for result.data
|
|
152
|
-
data =
|
|
188
|
+
data = data or {}
|
|
153
189
|
if not data:
|
|
154
|
-
data = {"response":
|
|
190
|
+
data = {"response": message}
|
|
155
191
|
|
|
156
192
|
# Token usage payload (optional) for result.usage
|
|
157
|
-
raw_usage = getattr(mock_config, "usage", None) or {}
|
|
158
193
|
usage = dict(raw_usage) if isinstance(raw_usage, dict) else {}
|
|
159
194
|
prompt_tokens = int(usage.get("prompt_tokens", 0) or 0)
|
|
160
195
|
completion_tokens = int(usage.get("completion_tokens", 0) or 0)
|
|
@@ -167,16 +202,16 @@ class MockAgentPrimitive:
|
|
|
167
202
|
usage.setdefault("total_tokens", total_tokens)
|
|
168
203
|
|
|
169
204
|
# Messages generated in this turn
|
|
170
|
-
user_message = opts.get("message")
|
|
205
|
+
user_message = opts.get("message")
|
|
171
206
|
new_messages = []
|
|
172
207
|
if user_message:
|
|
173
208
|
new_messages.append({"role": "user", "content": user_message})
|
|
174
|
-
if
|
|
175
|
-
new_messages.append({"role": "assistant", "content":
|
|
209
|
+
if message:
|
|
210
|
+
new_messages.append({"role": "assistant", "content": message})
|
|
176
211
|
|
|
177
212
|
# Return the configured message
|
|
178
213
|
return MockAgentResult(
|
|
179
|
-
message=
|
|
214
|
+
message=message,
|
|
180
215
|
tool_calls=tool_calls_executed,
|
|
181
216
|
data=data,
|
|
182
217
|
usage=usage,
|