tactus 0.31.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tactus/__init__.py +49 -0
- tactus/adapters/__init__.py +9 -0
- tactus/adapters/broker_log.py +76 -0
- tactus/adapters/cli_hitl.py +189 -0
- tactus/adapters/cli_log.py +223 -0
- tactus/adapters/cost_collector_log.py +56 -0
- tactus/adapters/file_storage.py +367 -0
- tactus/adapters/http_callback_log.py +109 -0
- tactus/adapters/ide_log.py +71 -0
- tactus/adapters/lua_tools.py +336 -0
- tactus/adapters/mcp.py +289 -0
- tactus/adapters/mcp_manager.py +196 -0
- tactus/adapters/memory.py +53 -0
- tactus/adapters/plugins.py +419 -0
- tactus/backends/http_backend.py +58 -0
- tactus/backends/model_backend.py +35 -0
- tactus/backends/pytorch_backend.py +110 -0
- tactus/broker/__init__.py +12 -0
- tactus/broker/client.py +247 -0
- tactus/broker/protocol.py +183 -0
- tactus/broker/server.py +1123 -0
- tactus/broker/stdio.py +12 -0
- tactus/cli/__init__.py +7 -0
- tactus/cli/app.py +2245 -0
- tactus/cli/commands/__init__.py +0 -0
- tactus/core/__init__.py +32 -0
- tactus/core/config_manager.py +790 -0
- tactus/core/dependencies/__init__.py +14 -0
- tactus/core/dependencies/registry.py +180 -0
- tactus/core/dsl_stubs.py +2117 -0
- tactus/core/exceptions.py +66 -0
- tactus/core/execution_context.py +480 -0
- tactus/core/lua_sandbox.py +508 -0
- tactus/core/message_history_manager.py +236 -0
- tactus/core/mocking.py +286 -0
- tactus/core/output_validator.py +291 -0
- tactus/core/registry.py +499 -0
- tactus/core/runtime.py +2907 -0
- tactus/core/template_resolver.py +142 -0
- tactus/core/yaml_parser.py +301 -0
- tactus/docker/Dockerfile +61 -0
- tactus/docker/entrypoint.sh +69 -0
- tactus/dspy/__init__.py +39 -0
- tactus/dspy/agent.py +1144 -0
- tactus/dspy/broker_lm.py +181 -0
- tactus/dspy/config.py +212 -0
- tactus/dspy/history.py +196 -0
- tactus/dspy/module.py +405 -0
- tactus/dspy/prediction.py +318 -0
- tactus/dspy/signature.py +185 -0
- tactus/formatting/__init__.py +7 -0
- tactus/formatting/formatter.py +437 -0
- tactus/ide/__init__.py +9 -0
- tactus/ide/coding_assistant.py +343 -0
- tactus/ide/server.py +2223 -0
- tactus/primitives/__init__.py +49 -0
- tactus/primitives/control.py +168 -0
- tactus/primitives/file.py +229 -0
- tactus/primitives/handles.py +378 -0
- tactus/primitives/host.py +94 -0
- tactus/primitives/human.py +342 -0
- tactus/primitives/json.py +189 -0
- tactus/primitives/log.py +187 -0
- tactus/primitives/message_history.py +157 -0
- tactus/primitives/model.py +163 -0
- tactus/primitives/procedure.py +564 -0
- tactus/primitives/procedure_callable.py +318 -0
- tactus/primitives/retry.py +155 -0
- tactus/primitives/session.py +152 -0
- tactus/primitives/state.py +182 -0
- tactus/primitives/step.py +209 -0
- tactus/primitives/system.py +93 -0
- tactus/primitives/tool.py +375 -0
- tactus/primitives/tool_handle.py +279 -0
- tactus/primitives/toolset.py +229 -0
- tactus/protocols/__init__.py +38 -0
- tactus/protocols/chat_recorder.py +81 -0
- tactus/protocols/config.py +97 -0
- tactus/protocols/cost.py +31 -0
- tactus/protocols/hitl.py +71 -0
- tactus/protocols/log_handler.py +27 -0
- tactus/protocols/models.py +355 -0
- tactus/protocols/result.py +33 -0
- tactus/protocols/storage.py +90 -0
- tactus/providers/__init__.py +13 -0
- tactus/providers/base.py +92 -0
- tactus/providers/bedrock.py +117 -0
- tactus/providers/google.py +105 -0
- tactus/providers/openai.py +98 -0
- tactus/sandbox/__init__.py +63 -0
- tactus/sandbox/config.py +171 -0
- tactus/sandbox/container_runner.py +1099 -0
- tactus/sandbox/docker_manager.py +433 -0
- tactus/sandbox/entrypoint.py +227 -0
- tactus/sandbox/protocol.py +213 -0
- tactus/stdlib/__init__.py +10 -0
- tactus/stdlib/io/__init__.py +13 -0
- tactus/stdlib/io/csv.py +88 -0
- tactus/stdlib/io/excel.py +136 -0
- tactus/stdlib/io/file.py +90 -0
- tactus/stdlib/io/fs.py +154 -0
- tactus/stdlib/io/hdf5.py +121 -0
- tactus/stdlib/io/json.py +109 -0
- tactus/stdlib/io/parquet.py +83 -0
- tactus/stdlib/io/tsv.py +88 -0
- tactus/stdlib/loader.py +274 -0
- tactus/stdlib/tac/tactus/tools/done.tac +33 -0
- tactus/stdlib/tac/tactus/tools/log.tac +50 -0
- tactus/testing/README.md +273 -0
- tactus/testing/__init__.py +61 -0
- tactus/testing/behave_integration.py +380 -0
- tactus/testing/context.py +486 -0
- tactus/testing/eval_models.py +114 -0
- tactus/testing/evaluation_runner.py +222 -0
- tactus/testing/evaluators.py +634 -0
- tactus/testing/events.py +94 -0
- tactus/testing/gherkin_parser.py +134 -0
- tactus/testing/mock_agent.py +315 -0
- tactus/testing/mock_dependencies.py +234 -0
- tactus/testing/mock_hitl.py +171 -0
- tactus/testing/mock_registry.py +168 -0
- tactus/testing/mock_tools.py +133 -0
- tactus/testing/models.py +115 -0
- tactus/testing/pydantic_eval_runner.py +508 -0
- tactus/testing/steps/__init__.py +13 -0
- tactus/testing/steps/builtin.py +902 -0
- tactus/testing/steps/custom.py +69 -0
- tactus/testing/steps/registry.py +68 -0
- tactus/testing/test_runner.py +489 -0
- tactus/tracing/__init__.py +5 -0
- tactus/tracing/trace_manager.py +417 -0
- tactus/utils/__init__.py +1 -0
- tactus/utils/cost_calculator.py +72 -0
- tactus/utils/model_pricing.py +132 -0
- tactus/utils/safe_file_library.py +502 -0
- tactus/utils/safe_libraries.py +234 -0
- tactus/validation/LuaLexerBase.py +66 -0
- tactus/validation/LuaParserBase.py +23 -0
- tactus/validation/README.md +224 -0
- tactus/validation/__init__.py +7 -0
- tactus/validation/error_listener.py +21 -0
- tactus/validation/generated/LuaLexer.interp +231 -0
- tactus/validation/generated/LuaLexer.py +5548 -0
- tactus/validation/generated/LuaLexer.tokens +124 -0
- tactus/validation/generated/LuaLexerBase.py +66 -0
- tactus/validation/generated/LuaParser.interp +173 -0
- tactus/validation/generated/LuaParser.py +6439 -0
- tactus/validation/generated/LuaParser.tokens +124 -0
- tactus/validation/generated/LuaParserBase.py +23 -0
- tactus/validation/generated/LuaParserVisitor.py +118 -0
- tactus/validation/generated/__init__.py +7 -0
- tactus/validation/grammar/LuaLexer.g4 +123 -0
- tactus/validation/grammar/LuaParser.g4 +178 -0
- tactus/validation/semantic_visitor.py +817 -0
- tactus/validation/validator.py +157 -0
- tactus-0.31.0.dist-info/METADATA +1809 -0
- tactus-0.31.0.dist-info/RECORD +160 -0
- tactus-0.31.0.dist-info/WHEEL +4 -0
- tactus-0.31.0.dist-info/entry_points.txt +2 -0
- tactus-0.31.0.dist-info/licenses/LICENSE +21 -0
tactus/testing/events.py
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Structured log events for IDE integration.
|
|
3
|
+
|
|
4
|
+
Provides Pydantic models for test and evaluation events
|
|
5
|
+
that can be emitted as structured logs for IDE display.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from datetime import datetime
|
|
9
|
+
from typing import List
|
|
10
|
+
from pydantic import BaseModel, Field
|
|
11
|
+
|
|
12
|
+
from .models import TestResult, EvaluationResult
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class TestStartedEvent(BaseModel):
|
|
16
|
+
"""Event emitted when tests start."""
|
|
17
|
+
|
|
18
|
+
event_type: str = "test_started"
|
|
19
|
+
procedure_file: str
|
|
20
|
+
total_scenarios: int
|
|
21
|
+
timestamp: datetime = Field(default_factory=datetime.now)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class TestCompletedEvent(BaseModel):
|
|
25
|
+
"""Event emitted when tests complete."""
|
|
26
|
+
|
|
27
|
+
event_type: str = "test_completed"
|
|
28
|
+
result: TestResult
|
|
29
|
+
timestamp: datetime = Field(default_factory=datetime.now)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class TestScenarioStartedEvent(BaseModel):
|
|
33
|
+
"""Event emitted when a scenario starts."""
|
|
34
|
+
|
|
35
|
+
event_type: str = "test_scenario_started"
|
|
36
|
+
scenario_name: str
|
|
37
|
+
timestamp: datetime = Field(default_factory=datetime.now)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class TestScenarioCompletedEvent(BaseModel):
|
|
41
|
+
"""Event emitted when a scenario completes."""
|
|
42
|
+
|
|
43
|
+
event_type: str = "test_scenario_completed"
|
|
44
|
+
scenario_name: str
|
|
45
|
+
status: str # passed, failed, skipped
|
|
46
|
+
duration: float
|
|
47
|
+
total_cost: float = 0.0 # Total LLM cost for this scenario
|
|
48
|
+
total_tokens: int = 0 # Total tokens used in this scenario
|
|
49
|
+
timestamp: datetime = Field(default_factory=datetime.now)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class EvaluationStartedEvent(BaseModel):
|
|
53
|
+
"""Event emitted when evaluation starts."""
|
|
54
|
+
|
|
55
|
+
event_type: str = "evaluation_started"
|
|
56
|
+
procedure_file: str
|
|
57
|
+
total_scenarios: int
|
|
58
|
+
runs_per_scenario: int
|
|
59
|
+
timestamp: datetime = Field(default_factory=datetime.now)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class EvaluationCompletedEvent(BaseModel):
|
|
63
|
+
"""Event emitted when evaluation completes."""
|
|
64
|
+
|
|
65
|
+
event_type: str = "evaluation_completed"
|
|
66
|
+
results: List[EvaluationResult]
|
|
67
|
+
timestamp: datetime = Field(default_factory=datetime.now)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class EvaluationScenarioStartedEvent(BaseModel):
|
|
71
|
+
"""Event emitted when scenario evaluation starts."""
|
|
72
|
+
|
|
73
|
+
event_type: str = "evaluation_scenario_started"
|
|
74
|
+
scenario_name: str
|
|
75
|
+
runs: int
|
|
76
|
+
timestamp: datetime = Field(default_factory=datetime.now)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class EvaluationScenarioCompletedEvent(BaseModel):
|
|
80
|
+
"""Event emitted when scenario evaluation completes."""
|
|
81
|
+
|
|
82
|
+
event_type: str = "evaluation_scenario_completed"
|
|
83
|
+
result: EvaluationResult
|
|
84
|
+
timestamp: datetime = Field(default_factory=datetime.now)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class EvaluationProgressEvent(BaseModel):
|
|
88
|
+
"""Event emitted during evaluation progress."""
|
|
89
|
+
|
|
90
|
+
event_type: str = "evaluation_progress"
|
|
91
|
+
scenario_name: str
|
|
92
|
+
completed_runs: int
|
|
93
|
+
total_runs: int
|
|
94
|
+
timestamp: datetime = Field(default_factory=datetime.now)
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Gherkin parser integration using gherkin-official library.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
from typing import Optional
|
|
7
|
+
|
|
8
|
+
try:
|
|
9
|
+
from gherkin.parser import Parser
|
|
10
|
+
from gherkin.token_scanner import TokenScanner
|
|
11
|
+
|
|
12
|
+
GHERKIN_AVAILABLE = True
|
|
13
|
+
except ImportError:
|
|
14
|
+
GHERKIN_AVAILABLE = False
|
|
15
|
+
|
|
16
|
+
from .models import ParsedStep, ParsedScenario, ParsedFeature
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class GherkinParser:
|
|
23
|
+
"""
|
|
24
|
+
Parses Gherkin text into structured Pydantic models.
|
|
25
|
+
|
|
26
|
+
Uses the official Gherkin parser library for accurate parsing.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(self):
|
|
30
|
+
if not GHERKIN_AVAILABLE:
|
|
31
|
+
raise ImportError(
|
|
32
|
+
"gherkin-official library not installed. Install with: pip install gherkin-official"
|
|
33
|
+
)
|
|
34
|
+
self.parser = Parser()
|
|
35
|
+
|
|
36
|
+
def parse(self, gherkin_text: str) -> ParsedFeature:
|
|
37
|
+
"""
|
|
38
|
+
Parse Gherkin text into a ParsedFeature model.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
gherkin_text: Raw Gherkin feature text
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
ParsedFeature with all scenarios and steps
|
|
45
|
+
|
|
46
|
+
Raises:
|
|
47
|
+
ValueError: If Gherkin syntax is invalid
|
|
48
|
+
"""
|
|
49
|
+
try:
|
|
50
|
+
scanner = TokenScanner(gherkin_text)
|
|
51
|
+
gherkin_document = self.parser.parse(scanner)
|
|
52
|
+
|
|
53
|
+
if not gherkin_document or not gherkin_document.get("feature"):
|
|
54
|
+
raise ValueError("No feature found in Gherkin text")
|
|
55
|
+
|
|
56
|
+
return self._convert_to_pydantic(gherkin_document)
|
|
57
|
+
|
|
58
|
+
except Exception as e:
|
|
59
|
+
logger.error(f"Failed to parse Gherkin: {e}")
|
|
60
|
+
raise ValueError(f"Invalid Gherkin syntax: {e}")
|
|
61
|
+
|
|
62
|
+
def _convert_to_pydantic(self, gherkin_document: dict) -> ParsedFeature:
|
|
63
|
+
"""Convert Gherkin parser output to Pydantic models."""
|
|
64
|
+
feature_data = gherkin_document["feature"]
|
|
65
|
+
|
|
66
|
+
# Extract feature metadata
|
|
67
|
+
feature_name = feature_data.get("name", "Unnamed Feature")
|
|
68
|
+
feature_description = feature_data.get("description", "")
|
|
69
|
+
feature_tags = [tag["name"] for tag in feature_data.get("tags", [])]
|
|
70
|
+
feature_line = feature_data.get("location", {}).get("line")
|
|
71
|
+
|
|
72
|
+
# Parse scenarios
|
|
73
|
+
scenarios = []
|
|
74
|
+
for child in feature_data.get("children", []):
|
|
75
|
+
if child.get("scenario"):
|
|
76
|
+
scenario = self._parse_scenario(child["scenario"])
|
|
77
|
+
scenarios.append(scenario)
|
|
78
|
+
|
|
79
|
+
return ParsedFeature(
|
|
80
|
+
name=feature_name,
|
|
81
|
+
description=feature_description,
|
|
82
|
+
scenarios=scenarios,
|
|
83
|
+
tags=feature_tags,
|
|
84
|
+
line=feature_line,
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
def _parse_scenario(self, scenario_data: dict) -> ParsedScenario:
|
|
88
|
+
"""Parse a scenario from Gherkin parser output."""
|
|
89
|
+
scenario_name = scenario_data.get("name", "Unnamed Scenario")
|
|
90
|
+
scenario_tags = [tag["name"] for tag in scenario_data.get("tags", [])]
|
|
91
|
+
scenario_line = scenario_data.get("location", {}).get("line")
|
|
92
|
+
|
|
93
|
+
# Parse steps
|
|
94
|
+
steps = []
|
|
95
|
+
for step_data in scenario_data.get("steps", []):
|
|
96
|
+
step = self._parse_step(step_data)
|
|
97
|
+
steps.append(step)
|
|
98
|
+
|
|
99
|
+
return ParsedScenario(
|
|
100
|
+
name=scenario_name,
|
|
101
|
+
tags=scenario_tags,
|
|
102
|
+
steps=steps,
|
|
103
|
+
line=scenario_line,
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
def _parse_step(self, step_data: dict) -> ParsedStep:
|
|
107
|
+
"""Parse a step from Gherkin parser output."""
|
|
108
|
+
keyword = step_data.get("keyword", "").strip()
|
|
109
|
+
text = step_data.get("text", "")
|
|
110
|
+
line = step_data.get("location", {}).get("line")
|
|
111
|
+
|
|
112
|
+
return ParsedStep(
|
|
113
|
+
keyword=keyword,
|
|
114
|
+
message=text,
|
|
115
|
+
line=line,
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def parse_gherkin(gherkin_text: str) -> Optional[ParsedFeature]:
|
|
120
|
+
"""
|
|
121
|
+
Convenience function to parse Gherkin text.
|
|
122
|
+
|
|
123
|
+
Args:
|
|
124
|
+
gherkin_text: Raw Gherkin feature text
|
|
125
|
+
|
|
126
|
+
Returns:
|
|
127
|
+
ParsedFeature or None if parsing fails
|
|
128
|
+
"""
|
|
129
|
+
try:
|
|
130
|
+
parser = GherkinParser()
|
|
131
|
+
return parser.parse(gherkin_text)
|
|
132
|
+
except Exception as e:
|
|
133
|
+
logger.error(f"Failed to parse Gherkin: {e}")
|
|
134
|
+
return None
|
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Mock agent primitive for BDD testing.
|
|
3
|
+
|
|
4
|
+
Provides mock agent that simulates turns without LLM calls.
|
|
5
|
+
Uses agent mock configurations from Mocks {} in .tac files.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
from typing import Any, Dict, List, Optional
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class MockAgentResult:
|
|
15
|
+
"""Result from a mock agent turn."""
|
|
16
|
+
|
|
17
|
+
def __init__(
|
|
18
|
+
self,
|
|
19
|
+
message: str = "",
|
|
20
|
+
tool_calls: Optional[List[Dict]] = None,
|
|
21
|
+
data: Optional[Dict[str, Any]] = None,
|
|
22
|
+
usage: Optional[Dict[str, Any]] = None,
|
|
23
|
+
new_messages: Optional[List[Dict[str, Any]]] = None,
|
|
24
|
+
lua_table_from: Optional[Any] = None,
|
|
25
|
+
):
|
|
26
|
+
self.message = message
|
|
27
|
+
self.response = message
|
|
28
|
+
self.tool_calls = tool_calls or []
|
|
29
|
+
self.data = data or {}
|
|
30
|
+
self.usage = usage or {}
|
|
31
|
+
self.cost = 0.0
|
|
32
|
+
try:
|
|
33
|
+
self.tokens = int(self.usage.get("total_tokens", 0) or 0)
|
|
34
|
+
except Exception:
|
|
35
|
+
self.tokens = 0
|
|
36
|
+
|
|
37
|
+
self._new_messages = new_messages or []
|
|
38
|
+
self._lua_table_from = lua_table_from
|
|
39
|
+
|
|
40
|
+
def __repr__(self) -> str:
|
|
41
|
+
return (
|
|
42
|
+
f"MockAgentResult(message={self.message!r}, tool_calls={len(self.tool_calls)}, "
|
|
43
|
+
f"data_keys={len(self.data) if hasattr(self.data, '__len__') else 'n/a'})"
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
def new_messages(self):
|
|
47
|
+
"""
|
|
48
|
+
Return messages generated in this turn.
|
|
49
|
+
|
|
50
|
+
In Lua, callers expect a table (for `#msgs` and 1-based indexing).
|
|
51
|
+
"""
|
|
52
|
+
if self._lua_table_from is not None:
|
|
53
|
+
try:
|
|
54
|
+
return self._lua_table_from(self._new_messages)
|
|
55
|
+
except Exception:
|
|
56
|
+
# Fall back to raw Python list if conversion fails.
|
|
57
|
+
pass
|
|
58
|
+
return self._new_messages
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class MockAgentPrimitive:
|
|
62
|
+
"""
|
|
63
|
+
Mock agent that simulates turns without making LLM calls.
|
|
64
|
+
|
|
65
|
+
Uses agent mock configurations from Mocks {} in .tac files.
|
|
66
|
+
The mock config specifies exactly which tool calls to simulate,
|
|
67
|
+
allowing tests to pass in CI without real LLM calls.
|
|
68
|
+
|
|
69
|
+
Example Mocks {} configuration:
|
|
70
|
+
Mocks {
|
|
71
|
+
my_agent = {
|
|
72
|
+
tool_calls = {
|
|
73
|
+
{tool = "search", args = {query = "test"}},
|
|
74
|
+
{tool = "done", args = {reason = "completed"}}
|
|
75
|
+
},
|
|
76
|
+
message = "I found the results."
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
def __init__(
|
|
82
|
+
self,
|
|
83
|
+
name: str,
|
|
84
|
+
tool_primitive: Any,
|
|
85
|
+
registry: Any = None,
|
|
86
|
+
mock_manager: Any = None,
|
|
87
|
+
lua_runtime: Any = None,
|
|
88
|
+
lua_table_from: Any = None,
|
|
89
|
+
):
|
|
90
|
+
"""
|
|
91
|
+
Initialize mock agent.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
name: Agent name
|
|
95
|
+
tool_primitive: ToolPrimitive for recording tool calls
|
|
96
|
+
registry: Registry containing agent_mocks configuration
|
|
97
|
+
mock_manager: Optional MockManager (for tool response mocking)
|
|
98
|
+
"""
|
|
99
|
+
self.name = name
|
|
100
|
+
self.tool_primitive = tool_primitive
|
|
101
|
+
self.registry = registry
|
|
102
|
+
self.mock_manager = mock_manager
|
|
103
|
+
self.turn_count = 0
|
|
104
|
+
if lua_table_from is not None:
|
|
105
|
+
self._lua_table_from = lua_table_from
|
|
106
|
+
elif lua_runtime is not None and hasattr(lua_runtime, "table_from"):
|
|
107
|
+
self._lua_table_from = lua_runtime.table_from
|
|
108
|
+
else:
|
|
109
|
+
self._lua_table_from = None
|
|
110
|
+
|
|
111
|
+
def turn(self, opts: Optional[Dict[str, Any]] = None) -> MockAgentResult:
|
|
112
|
+
"""
|
|
113
|
+
Simulate an agent turn by executing configured tool calls.
|
|
114
|
+
|
|
115
|
+
Looks up agent mock config in registry.agent_mocks and executes
|
|
116
|
+
the specified tool calls, then returns the configured message.
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
opts: Optional turn options (for compatibility)
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
MockAgentResult with message and tool call info
|
|
123
|
+
|
|
124
|
+
Raises:
|
|
125
|
+
ValueError: If no mock config is found for this agent
|
|
126
|
+
"""
|
|
127
|
+
opts = opts or {}
|
|
128
|
+
self.turn_count += 1
|
|
129
|
+
logger.info(f"Mock agent turn: {self.name} (turn {self.turn_count})")
|
|
130
|
+
|
|
131
|
+
# Get agent mock config
|
|
132
|
+
mock_config = self._get_agent_mock_config()
|
|
133
|
+
|
|
134
|
+
if mock_config is None:
|
|
135
|
+
raise ValueError(
|
|
136
|
+
f"Agent '{self.name}' requires mock config in Mocks {{}}. "
|
|
137
|
+
f"Add a mock configuration like:\n"
|
|
138
|
+
f"Mocks {{\n"
|
|
139
|
+
f" {self.name} = {{\n"
|
|
140
|
+
f" tool_calls = {{\n"
|
|
141
|
+
f' {{tool = "done", args = {{reason = "completed"}}}}\n'
|
|
142
|
+
f" }},\n"
|
|
143
|
+
f' message = "Task completed."\n'
|
|
144
|
+
f" }}\n"
|
|
145
|
+
f"}}"
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
temporal_turns = getattr(mock_config, "temporal", None) or []
|
|
149
|
+
if temporal_turns:
|
|
150
|
+
injected = opts.get("message")
|
|
151
|
+
|
|
152
|
+
selected_turn = None
|
|
153
|
+
if injected is not None:
|
|
154
|
+
for turn in temporal_turns:
|
|
155
|
+
if isinstance(turn, dict) and turn.get("when_message") == injected:
|
|
156
|
+
selected_turn = turn
|
|
157
|
+
break
|
|
158
|
+
|
|
159
|
+
if selected_turn is None:
|
|
160
|
+
idx = self.turn_count - 1 # 1-indexed turns
|
|
161
|
+
if idx < 0:
|
|
162
|
+
idx = 0
|
|
163
|
+
if idx >= len(temporal_turns):
|
|
164
|
+
idx = len(temporal_turns) - 1
|
|
165
|
+
selected_turn = temporal_turns[idx]
|
|
166
|
+
|
|
167
|
+
turn = selected_turn
|
|
168
|
+
if isinstance(turn, dict):
|
|
169
|
+
message = turn.get("message", mock_config.message)
|
|
170
|
+
tool_calls = turn.get("tool_calls", mock_config.tool_calls)
|
|
171
|
+
data = turn.get("data", mock_config.data)
|
|
172
|
+
raw_usage = turn.get("usage", mock_config.usage)
|
|
173
|
+
else:
|
|
174
|
+
message = mock_config.message
|
|
175
|
+
tool_calls = mock_config.tool_calls
|
|
176
|
+
data = mock_config.data
|
|
177
|
+
raw_usage = mock_config.usage
|
|
178
|
+
else:
|
|
179
|
+
message = mock_config.message
|
|
180
|
+
tool_calls = mock_config.tool_calls
|
|
181
|
+
data = mock_config.data
|
|
182
|
+
raw_usage = mock_config.usage
|
|
183
|
+
|
|
184
|
+
# Execute the configured tool calls
|
|
185
|
+
tool_calls_executed = self._execute_tool_calls(tool_calls)
|
|
186
|
+
|
|
187
|
+
# Structured payload (optional) for result.data
|
|
188
|
+
data = data or {}
|
|
189
|
+
if not data:
|
|
190
|
+
data = {"response": message}
|
|
191
|
+
|
|
192
|
+
# Token usage payload (optional) for result.usage
|
|
193
|
+
usage = dict(raw_usage) if isinstance(raw_usage, dict) else {}
|
|
194
|
+
prompt_tokens = int(usage.get("prompt_tokens", 0) or 0)
|
|
195
|
+
completion_tokens = int(usage.get("completion_tokens", 0) or 0)
|
|
196
|
+
total_tokens = usage.get("total_tokens")
|
|
197
|
+
if total_tokens is None:
|
|
198
|
+
total_tokens = prompt_tokens + completion_tokens
|
|
199
|
+
total_tokens = int(total_tokens or 0)
|
|
200
|
+
usage.setdefault("prompt_tokens", prompt_tokens)
|
|
201
|
+
usage.setdefault("completion_tokens", completion_tokens)
|
|
202
|
+
usage.setdefault("total_tokens", total_tokens)
|
|
203
|
+
|
|
204
|
+
# Messages generated in this turn
|
|
205
|
+
user_message = opts.get("message")
|
|
206
|
+
new_messages = []
|
|
207
|
+
if user_message:
|
|
208
|
+
new_messages.append({"role": "user", "content": user_message})
|
|
209
|
+
if message:
|
|
210
|
+
new_messages.append({"role": "assistant", "content": message})
|
|
211
|
+
|
|
212
|
+
# Return the configured message
|
|
213
|
+
return MockAgentResult(
|
|
214
|
+
message=message,
|
|
215
|
+
tool_calls=tool_calls_executed,
|
|
216
|
+
data=data,
|
|
217
|
+
usage=usage,
|
|
218
|
+
new_messages=new_messages,
|
|
219
|
+
lua_table_from=self._lua_table_from,
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
def _get_agent_mock_config(self) -> Optional[Any]:
|
|
223
|
+
"""
|
|
224
|
+
Get agent mock config from registry.agent_mocks.
|
|
225
|
+
|
|
226
|
+
Returns:
|
|
227
|
+
AgentMockConfig if found, None otherwise
|
|
228
|
+
"""
|
|
229
|
+
if not self.registry:
|
|
230
|
+
return None
|
|
231
|
+
|
|
232
|
+
# Check for agent mock in registry.agent_mocks
|
|
233
|
+
if hasattr(self.registry, "agent_mocks"):
|
|
234
|
+
return self.registry.agent_mocks.get(self.name)
|
|
235
|
+
|
|
236
|
+
return None
|
|
237
|
+
|
|
238
|
+
def _execute_tool_calls(self, tool_calls: List[Dict[str, Any]]) -> List[Dict]:
|
|
239
|
+
"""
|
|
240
|
+
Execute the configured tool calls.
|
|
241
|
+
|
|
242
|
+
Records each tool call via the tool_primitive, which will
|
|
243
|
+
use mock responses from the MockManager if configured.
|
|
244
|
+
|
|
245
|
+
Args:
|
|
246
|
+
tool_calls: List of tool call configs [{tool: "name", args: {...}}, ...]
|
|
247
|
+
|
|
248
|
+
Returns:
|
|
249
|
+
List of executed tool calls with results
|
|
250
|
+
"""
|
|
251
|
+
executed = []
|
|
252
|
+
|
|
253
|
+
for tool_call in tool_calls:
|
|
254
|
+
tool_name = tool_call.get("tool")
|
|
255
|
+
args = tool_call.get("args", {})
|
|
256
|
+
|
|
257
|
+
if not tool_name:
|
|
258
|
+
logger.warning(f"Skipping invalid tool call config: {tool_call}")
|
|
259
|
+
continue
|
|
260
|
+
|
|
261
|
+
logger.debug(f"Mock agent {self.name} executing tool call: {tool_name}({args})")
|
|
262
|
+
|
|
263
|
+
# Record the tool call via tool primitive
|
|
264
|
+
# MockedToolPrimitive.record_call(tool_name, args) returns the mock response
|
|
265
|
+
result = None
|
|
266
|
+
if self.tool_primitive:
|
|
267
|
+
try:
|
|
268
|
+
# record_call returns the mock response and records the call
|
|
269
|
+
result = self.tool_primitive.record_call(tool_name, args)
|
|
270
|
+
except Exception as e:
|
|
271
|
+
logger.warning(f"Error recording tool call {tool_name}: {e}")
|
|
272
|
+
result = {"status": "ok", "tool": tool_name}
|
|
273
|
+
|
|
274
|
+
executed.append(
|
|
275
|
+
{
|
|
276
|
+
"tool": tool_name,
|
|
277
|
+
"args": args,
|
|
278
|
+
"result": result,
|
|
279
|
+
}
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
return executed
|
|
283
|
+
|
|
284
|
+
def __call__(self, inputs: Optional[Dict[str, Any]] = None) -> MockAgentResult:
|
|
285
|
+
"""
|
|
286
|
+
Execute an agent turn using the callable interface.
|
|
287
|
+
|
|
288
|
+
This makes the mock agent callable like real agents:
|
|
289
|
+
result = worker({message = "Hello"})
|
|
290
|
+
|
|
291
|
+
Args:
|
|
292
|
+
inputs: Input dict (ignored in mock mode, tool calls are from config)
|
|
293
|
+
|
|
294
|
+
Returns:
|
|
295
|
+
MockAgentResult with response and tool call info
|
|
296
|
+
"""
|
|
297
|
+
inputs = inputs or {}
|
|
298
|
+
|
|
299
|
+
# Convert Lua table to dict if needed
|
|
300
|
+
if hasattr(inputs, "items"):
|
|
301
|
+
try:
|
|
302
|
+
inputs = dict(inputs.items())
|
|
303
|
+
except (AttributeError, TypeError):
|
|
304
|
+
pass
|
|
305
|
+
|
|
306
|
+
# Extract message field for logging
|
|
307
|
+
message = inputs.get("message", "")
|
|
308
|
+
if message:
|
|
309
|
+
logger.debug(f"Mock agent {self.name} received message: {message}")
|
|
310
|
+
|
|
311
|
+
# Execute the turn (tool calls come from config, not inputs)
|
|
312
|
+
return self.turn(inputs)
|
|
313
|
+
|
|
314
|
+
def __repr__(self) -> str:
|
|
315
|
+
return f"MockAgentPrimitive({self.name}, turns={self.turn_count})"
|