PyPI - tactus - Versions diffs - 0.31.2__py3-none-any.whl - Mend

tactus 0.31.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (160) hide show

tactus/__init__.py +49 -0
tactus/adapters/__init__.py +9 -0
tactus/adapters/broker_log.py +76 -0
tactus/adapters/cli_hitl.py +189 -0
tactus/adapters/cli_log.py +223 -0
tactus/adapters/cost_collector_log.py +56 -0
tactus/adapters/file_storage.py +367 -0
tactus/adapters/http_callback_log.py +109 -0
tactus/adapters/ide_log.py +71 -0
tactus/adapters/lua_tools.py +336 -0
tactus/adapters/mcp.py +289 -0
tactus/adapters/mcp_manager.py +196 -0
tactus/adapters/memory.py +53 -0
tactus/adapters/plugins.py +419 -0
tactus/backends/http_backend.py +58 -0
tactus/backends/model_backend.py +35 -0
tactus/backends/pytorch_backend.py +110 -0
tactus/broker/__init__.py +12 -0
tactus/broker/client.py +247 -0
tactus/broker/protocol.py +183 -0
tactus/broker/server.py +1123 -0
tactus/broker/stdio.py +12 -0
tactus/cli/__init__.py +7 -0
tactus/cli/app.py +2245 -0
tactus/cli/commands/__init__.py +0 -0
tactus/core/__init__.py +32 -0
tactus/core/config_manager.py +790 -0
tactus/core/dependencies/__init__.py +14 -0
tactus/core/dependencies/registry.py +180 -0
tactus/core/dsl_stubs.py +2117 -0
tactus/core/exceptions.py +66 -0
tactus/core/execution_context.py +480 -0
tactus/core/lua_sandbox.py +508 -0
tactus/core/message_history_manager.py +236 -0
tactus/core/mocking.py +286 -0
tactus/core/output_validator.py +291 -0
tactus/core/registry.py +499 -0
tactus/core/runtime.py +2907 -0
tactus/core/template_resolver.py +142 -0
tactus/core/yaml_parser.py +301 -0
tactus/docker/Dockerfile +61 -0
tactus/docker/entrypoint.sh +69 -0
tactus/dspy/__init__.py +39 -0
tactus/dspy/agent.py +1144 -0
tactus/dspy/broker_lm.py +181 -0
tactus/dspy/config.py +212 -0
tactus/dspy/history.py +196 -0
tactus/dspy/module.py +405 -0
tactus/dspy/prediction.py +318 -0
tactus/dspy/signature.py +185 -0
tactus/formatting/__init__.py +7 -0
tactus/formatting/formatter.py +437 -0
tactus/ide/__init__.py +9 -0
tactus/ide/coding_assistant.py +343 -0
tactus/ide/server.py +2223 -0
tactus/primitives/__init__.py +49 -0
tactus/primitives/control.py +168 -0
tactus/primitives/file.py +229 -0
tactus/primitives/handles.py +378 -0
tactus/primitives/host.py +94 -0
tactus/primitives/human.py +342 -0
tactus/primitives/json.py +189 -0
tactus/primitives/log.py +187 -0
tactus/primitives/message_history.py +157 -0
tactus/primitives/model.py +163 -0
tactus/primitives/procedure.py +564 -0
tactus/primitives/procedure_callable.py +318 -0
tactus/primitives/retry.py +155 -0
tactus/primitives/session.py +152 -0
tactus/primitives/state.py +182 -0
tactus/primitives/step.py +209 -0
tactus/primitives/system.py +93 -0
tactus/primitives/tool.py +375 -0
tactus/primitives/tool_handle.py +279 -0
tactus/primitives/toolset.py +229 -0
tactus/protocols/__init__.py +38 -0
tactus/protocols/chat_recorder.py +81 -0
tactus/protocols/config.py +97 -0
tactus/protocols/cost.py +31 -0
tactus/protocols/hitl.py +71 -0
tactus/protocols/log_handler.py +27 -0
tactus/protocols/models.py +355 -0
tactus/protocols/result.py +33 -0
tactus/protocols/storage.py +90 -0
tactus/providers/__init__.py +13 -0
tactus/providers/base.py +92 -0
tactus/providers/bedrock.py +117 -0
tactus/providers/google.py +105 -0
tactus/providers/openai.py +98 -0
tactus/sandbox/__init__.py +63 -0
tactus/sandbox/config.py +171 -0
tactus/sandbox/container_runner.py +1099 -0
tactus/sandbox/docker_manager.py +433 -0
tactus/sandbox/entrypoint.py +227 -0
tactus/sandbox/protocol.py +213 -0
tactus/stdlib/__init__.py +10 -0
tactus/stdlib/io/__init__.py +13 -0
tactus/stdlib/io/csv.py +88 -0
tactus/stdlib/io/excel.py +136 -0
tactus/stdlib/io/file.py +90 -0
tactus/stdlib/io/fs.py +154 -0
tactus/stdlib/io/hdf5.py +121 -0
tactus/stdlib/io/json.py +109 -0
tactus/stdlib/io/parquet.py +83 -0
tactus/stdlib/io/tsv.py +88 -0
tactus/stdlib/loader.py +274 -0
tactus/stdlib/tac/tactus/tools/done.tac +33 -0
tactus/stdlib/tac/tactus/tools/log.tac +50 -0
tactus/testing/README.md +273 -0
tactus/testing/__init__.py +61 -0
tactus/testing/behave_integration.py +380 -0
tactus/testing/context.py +486 -0
tactus/testing/eval_models.py +114 -0
tactus/testing/evaluation_runner.py +222 -0
tactus/testing/evaluators.py +634 -0
tactus/testing/events.py +94 -0
tactus/testing/gherkin_parser.py +134 -0
tactus/testing/mock_agent.py +315 -0
tactus/testing/mock_dependencies.py +234 -0
tactus/testing/mock_hitl.py +171 -0
tactus/testing/mock_registry.py +168 -0
tactus/testing/mock_tools.py +133 -0
tactus/testing/models.py +115 -0
tactus/testing/pydantic_eval_runner.py +508 -0
tactus/testing/steps/__init__.py +13 -0
tactus/testing/steps/builtin.py +902 -0
tactus/testing/steps/custom.py +69 -0
tactus/testing/steps/registry.py +68 -0
tactus/testing/test_runner.py +489 -0
tactus/tracing/__init__.py +5 -0
tactus/tracing/trace_manager.py +417 -0
tactus/utils/__init__.py +1 -0
tactus/utils/cost_calculator.py +72 -0
tactus/utils/model_pricing.py +132 -0
tactus/utils/safe_file_library.py +502 -0
tactus/utils/safe_libraries.py +234 -0
tactus/validation/LuaLexerBase.py +66 -0
tactus/validation/LuaParserBase.py +23 -0
tactus/validation/README.md +224 -0
tactus/validation/__init__.py +7 -0
tactus/validation/error_listener.py +21 -0
tactus/validation/generated/LuaLexer.interp +231 -0
tactus/validation/generated/LuaLexer.py +5548 -0
tactus/validation/generated/LuaLexer.tokens +124 -0
tactus/validation/generated/LuaLexerBase.py +66 -0
tactus/validation/generated/LuaParser.interp +173 -0
tactus/validation/generated/LuaParser.py +6439 -0
tactus/validation/generated/LuaParser.tokens +124 -0
tactus/validation/generated/LuaParserBase.py +23 -0
tactus/validation/generated/LuaParserVisitor.py +118 -0
tactus/validation/generated/__init__.py +7 -0
tactus/validation/grammar/LuaLexer.g4 +123 -0
tactus/validation/grammar/LuaParser.g4 +178 -0
tactus/validation/semantic_visitor.py +817 -0
tactus/validation/validator.py +157 -0
tactus-0.31.2.dist-info/METADATA +1809 -0
tactus-0.31.2.dist-info/RECORD +160 -0
tactus-0.31.2.dist-info/WHEEL +4 -0
tactus-0.31.2.dist-info/entry_points.txt +2 -0
tactus-0.31.2.dist-info/licenses/LICENSE +21 -0

tactus/testing/mock_dependencies.py ADDED Viewed

@@ -0,0 +1,234 @@
+"""
+Mock implementations of dependencies for testing.
+Provides fake HTTP clients, databases, etc. that can be used in BDD tests
+without making real network calls or database connections.
+"""
+import logging
+from typing import Dict, Any, Optional, List
+from dataclasses import dataclass
+logger = logging.getLogger(__name__)
+@dataclass
+class MockResponse:
+    """Mock HTTP response."""
+    text: str
+    status_code: int = 200
+    headers: Dict[str, str] = None
+    def __post_init__(self):
+        if self.headers is None:
+            self.headers = {}
+    def json(self):
+        """Parse response as JSON."""
+        import json
+        return json.loads(self.text)
+class MockHTTPClient:
+    """
+    Mock HTTP client that returns pre-configured responses.
+    Used in tests to avoid making real HTTP calls.
+    """
+    def __init__(self, responses: Optional[Dict[str, str]] = None):
+        """
+        Initialize mock HTTP client.
+        Args:
+            responses: Dict mapping path to response text
+                      e.g., {"/weather": '{"temp": 72}'}
+        """
+        self.responses = responses or {}
+        self.calls: List[tuple] = []  # Track all calls for assertions
+        self.base_url = None
+    def add_response(self, path: str, response: str, status_code: int = 200):
+        """Add a mock response for a specific path."""
+        self.responses[path] = {"text": response, "status_code": status_code}
+        logger.debug(f"Added mock response for {path}")
+    async def get(self, path: str, **kwargs) -> MockResponse:
+        """Mock GET request."""
+        self.calls.append(("GET", path, kwargs))
+        logger.debug(f"Mock HTTP GET: {path}")
+        if path in self.responses:
+            response_data = self.responses[path]
+            if isinstance(response_data, dict):
+                return MockResponse(
+                    text=response_data.get("text", ""),
+                    status_code=response_data.get("status_code", 200),
+                )
+            else:
+                return MockResponse(text=response_data)
+        # Default response if no mock configured
+        return MockResponse(text="{}", status_code=200)
+    async def post(self, path: str, **kwargs) -> MockResponse:
+        """Mock POST request."""
+        self.calls.append(("POST", path, kwargs))
+        logger.debug(f"Mock HTTP POST: {path}")
+        if path in self.responses:
+            response_data = self.responses[path]
+            if isinstance(response_data, dict):
+                return MockResponse(
+                    text=response_data.get("text", ""),
+                    status_code=response_data.get("status_code", 200),
+                )
+            else:
+                return MockResponse(text=response_data)
+        return MockResponse(text="{}", status_code=200)
+    async def aclose(self):
+        """Mock close method (does nothing)."""
+        logger.debug("Mock HTTP client closed")
+        pass
+    def get_calls(self) -> List[tuple]:
+        """Get all calls made to this client."""
+        return self.calls
+    def was_called(self, method: str = None, path: str = None) -> bool:
+        """Check if a specific call was made."""
+        for call_method, call_path, _ in self.calls:
+            if method and method != call_method:
+                continue
+            if path and path != call_path:
+                continue
+            return True
+        return False
+class MockDatabase:
+    """
+    Mock database connection for testing.
+    Stores data in memory instead of making real database calls.
+    """
+    def __init__(self):
+        self.data: Dict[str, Any] = {}
+        self.queries: List[str] = []
+    async def execute(self, query: str, *args) -> Any:
+        """Mock query execution."""
+        self.queries.append(query)
+        logger.debug(f"Mock DB execute: {query}")
+        return None
+    async def fetch(self, query: str, *args) -> List[Dict]:
+        """Mock fetch (returns empty list)."""
+        self.queries.append(query)
+        logger.debug(f"Mock DB fetch: {query}")
+        return []
+    async def close(self):
+        """Mock close."""
+        logger.debug("Mock database closed")
+        pass
+class MockRedis:
+    """
+    Mock Redis client for testing.
+    Stores data in memory dictionary.
+    """
+    def __init__(self):
+        self.store: Dict[str, Any] = {}
+    async def get(self, key: str) -> Optional[str]:
+        """Mock get."""
+        return self.store.get(key)
+    async def set(self, key: str, value: Any):
+        """Mock set."""
+        self.store[key] = value
+        logger.debug(f"Mock Redis SET: {key}")
+    async def delete(self, key: str):
+        """Mock delete."""
+        if key in self.store:
+            del self.store[key]
+            logger.debug(f"Mock Redis DEL: {key}")
+    async def close(self):
+        """Mock close."""
+        logger.debug("Mock Redis closed")
+        pass
+class MockDependencyFactory:
+    """
+    Factory for creating mock dependencies instead of real ones.
+    Used by test infrastructure to inject mocks.
+    """
+    @staticmethod
+    async def create_mock(
+        resource_type: str, config: Dict[str, Any], mock_responses: Optional[Dict] = None
+    ) -> Any:
+        """
+        Create a mock dependency.
+        Args:
+            resource_type: Type of resource (http_client, postgres, redis)
+            config: Configuration dict (mostly ignored for mocks)
+            mock_responses: Optional dict of mock responses for HTTP client
+        Returns:
+            Mock resource instance
+        """
+        if resource_type == "http_client":
+            mock_client = MockHTTPClient(mock_responses)
+            mock_client.base_url = config.get("base_url")
+            return mock_client
+        elif resource_type == "postgres":
+            return MockDatabase()
+        elif resource_type == "redis":
+            return MockRedis()
+        else:
+            raise ValueError(f"Unknown resource type: {resource_type}")
+    @staticmethod
+    async def create_all_mocks(
+        dependencies_config: Dict[str, Dict[str, Any]],
+        mock_responses: Optional[Dict[str, Dict]] = None,
+    ) -> Dict[str, Any]:
+        """
+        Create all mock dependencies.
+        Args:
+            dependencies_config: Dict mapping dependency name to config
+            mock_responses: Optional dict mapping dependency name to mock responses
+        Returns:
+            Dict mapping dependency name to mock resource
+        """
+        mocks = {}
+        for dep_name, dep_config in dependencies_config.items():
+            resource_type = dep_config.get("type")
+            responses = mock_responses.get(dep_name) if mock_responses else None
+            mock = await MockDependencyFactory.create_mock(resource_type, dep_config, responses)
+            mocks[dep_name] = mock
+            logger.info(f"Created mock dependency '{dep_name}' of type '{resource_type}'")
+        return mocks

tactus/testing/mock_hitl.py ADDED Viewed

@@ -0,0 +1,171 @@
+"""
+Mock HITL handler for BDD testing.
+Provides automatic responses for human interactions during tests,
+allowing tests to run without human intervention.
+"""
+import logging
+from datetime import datetime
+from typing import Any, Dict, Optional
+from tactus.protocols.models import HITLRequest, HITLResponse
+logger = logging.getLogger(__name__)
+class MockHITLHandler:
+    """
+    HITL handler that provides automatic responses for tests.
+    Useful for:
+    - Running tests without human intervention
+    - Deterministic test behavior
+    - Fast test execution
+    """
+    def __init__(self, default_responses: Optional[Dict[str, Any]] = None):
+        """
+        Initialize mock HITL handler.
+        Args:
+            default_responses: Dict of request_id -> response value
+                              If not provided, uses sensible defaults
+        """
+        self.default_responses = default_responses or {}
+        self.requests_received: list[HITLRequest] = []
+    def request_interaction(self, procedure_id: str, request: HITLRequest) -> HITLResponse:
+        """
+        Handle HITL request with automatic response.
+        Args:
+            procedure_id: Unique procedure identifier
+            request: HITLRequest with interaction details
+        Returns:
+            HITLResponse with automatic answer
+        """
+        # Record the request
+        self.requests_received.append(request)
+        logger.debug(
+            f"Mock HITL request: type={request.request_type}, message={request.message[:50]}..."
+        )
+        # Determine response based on request type
+        if request.request_type == "approval":
+            value = self._get_response(request, default=True)
+        elif request.request_type == "input":
+            value = self._get_response(request, default="test input")
+        elif request.request_type == "review":
+            value = self._get_response(request, default={"decision": "Approve"})
+        elif request.request_type == "notification":
+            value = self._get_response(request, default=None)
+        elif request.request_type == "escalation":
+            value = self._get_response(request, default={"escalated": True})
+        else:
+            value = self._get_response(request, default=None)
+        logger.info(f"Mock HITL response: {value}")
+        return HITLResponse(
+            value=value,
+            responded_at=datetime.utcnow(),
+            timed_out=False,
+        )
+    def _get_response(self, request: HITLRequest, default: Any) -> Any:
+        """
+        Get response for request, checking custom responses first.
+        Args:
+            request: The HITL request
+            default: Default value if no custom response
+        Returns:
+            Response value
+        """
+        # Check if we have a custom response for this message
+        # Use message as key for lookup
+        message_key = request.message[:50]  # Use first 50 chars as key
+        if message_key in self.default_responses:
+            return self.default_responses[message_key]
+        # Check for type-based default
+        type_key = f"_type_{request.request_type}"
+        if type_key in self.default_responses:
+            return self.default_responses[type_key]
+        # Use default
+        return default
+    def check_pending_response(self, procedure_id: str, message_id: str) -> Optional[HITLResponse]:
+        """
+        Check for pending response (not used in tests).
+        Args:
+            procedure_id: Unique procedure identifier
+            message_id: Message/request ID to check
+        Returns:
+            None (tests don't have pending responses)
+        """
+        return None
+    def cancel_pending_request(self, procedure_id: str, message_id: str) -> None:
+        """
+        Cancel pending request (not used in tests).
+        Args:
+            procedure_id: Unique procedure identifier
+            message_id: Message/request ID to cancel
+        """
+        pass
+    def get_requests_received(self) -> list[HITLRequest]:
+        """
+        Get all HITL requests received during test.
+        Returns:
+            List of HITLRequest objects
+        """
+        return self.requests_received
+    def clear_history(self) -> None:
+        """Clear request history."""
+        self.requests_received.clear()
+    def configure_response(self, interaction_type: str, value: Any) -> None:
+        """
+        Configure mock response for a specific interaction type.
+        This allows dynamic configuration during test scenarios.
+        Args:
+            interaction_type: Type of interaction (approval, input, review, etc.)
+            value: The value to return for this interaction type
+        Example:
+            mock_hitl.configure_response("approval", True)
+            mock_hitl.configure_response("input", "test data")
+        """
+        type_key = f"_type_{interaction_type}"
+        self.default_responses[type_key] = value
+        logger.debug(f"Configured mock HITL response: {interaction_type} -> {value}")
+    def configure_message_response(self, message_prefix: str, value: Any) -> None:
+        """
+        Configure mock response for a specific message.
+        Args:
+            message_prefix: Prefix of the message to match
+            value: The value to return when this message is received
+        Example:
+            mock_hitl.configure_message_response("Approve payment", False)
+        """
+        self.default_responses[message_prefix] = value
+        logger.debug(f"Configured mock HITL response for message: {message_prefix}")

tactus/testing/mock_registry.py ADDED Viewed

@@ -0,0 +1,168 @@
+"""
+Unified mock registry for managing all mocks (dependencies + HITL).
+This provides a central place to configure mocks for both dependencies
+and HITL interactions, usable in BDD tests and evaluations.
+"""
+import logging
+from typing import Dict, Any, Optional
+from tactus.testing.mock_dependencies import MockHTTPClient, MockDatabase, MockRedis
+from tactus.testing.mock_hitl import MockHITLHandler
+logger = logging.getLogger(__name__)
+class UnifiedMockRegistry:
+    """
+    Central registry for all mocks (dependencies + HITL).
+    This allows test scenarios to configure mock responses via
+    Gherkin steps or programmatically.
+    """
+    def __init__(self, hitl_handler: Optional[MockHITLHandler] = None):
+        """
+        Initialize unified mock registry.
+        Args:
+            hitl_handler: Optional existing HITL handler to use
+        """
+        # HTTP dependency mocks (dep_name -> MockHTTPClient)
+        self.http_mocks: Dict[str, MockHTTPClient] = {}
+        # Database mocks
+        self.db_mocks: Dict[str, MockDatabase] = {}
+        # Redis mocks
+        self.redis_mocks: Dict[str, MockRedis] = {}
+        # HITL mock handler
+        self.hitl_mock: MockHITLHandler = hitl_handler or MockHITLHandler()
+        # Store all created mocks for cleanup
+        self.all_mocks: Dict[str, Any] = {}
+    def configure_http_response(
+        self, dep_name: str, path: Optional[str], response: str, status_code: int = 200
+    ) -> None:
+        """
+        Configure mock HTTP response via Gherkin step.
+        Args:
+            dep_name: Name of the HTTP dependency
+            path: URL path (or None for default response)
+            response: Response text (usually JSON string)
+            status_code: HTTP status code
+        Example:
+            registry.configure_http_response("weather_api", "/data", '{"temp": 72}')
+        """
+        if dep_name not in self.http_mocks:
+            self.http_mocks[dep_name] = MockHTTPClient()
+        if path:
+            self.http_mocks[dep_name].add_response(path, response, status_code)
+        else:
+            # Set default response for any path
+            self.http_mocks[dep_name].responses["_default"] = {
+                "text": response,
+                "status_code": status_code,
+            }
+        logger.debug(f"Configured mock HTTP response for {dep_name}: {path} -> {response[:50]}...")
+    def configure_hitl_response(self, interaction_type: str, value: Any) -> None:
+        """
+        Configure HITL mock response via Gherkin step.
+        Args:
+            interaction_type: Type of interaction (approval, input, review)
+            value: The value to return
+        Example:
+            registry.configure_hitl_response("approval", True)
+        """
+        self.hitl_mock.configure_response(interaction_type, value)
+    def configure_hitl_message_response(self, message_prefix: str, value: Any) -> None:
+        """
+        Configure HITL mock response for specific message.
+        Args:
+            message_prefix: Prefix of the message to match
+            value: The value to return
+        Example:
+            registry.configure_hitl_message_response("Approve payment", False)
+        """
+        self.hitl_mock.configure_message_response(message_prefix, value)
+    async def create_mock_dependencies(
+        self, dependencies_config: Dict[str, Dict[str, Any]]
+    ) -> Dict[str, Any]:
+        """
+        Create mock dependencies for runtime.
+        This is called by the test runner to create mocks based on
+        the procedure's dependency declarations.
+        Args:
+            dependencies_config: Dict mapping dependency name to config
+        Returns:
+            Dict mapping dependency name to mock resource
+        """
+        mocks = {}
+        for dep_name, dep_config in dependencies_config.items():
+            resource_type = dep_config.get("type")
+            if resource_type == "http_client":
+                # Use pre-configured mock if it exists, otherwise create new one
+                if dep_name in self.http_mocks:
+                    mock = self.http_mocks[dep_name]
+                else:
+                    mock = MockHTTPClient()
+                    self.http_mocks[dep_name] = mock
+                mock.base_url = dep_config.get("base_url")
+                mocks[dep_name] = mock
+            elif resource_type == "postgres":
+                if dep_name not in self.db_mocks:
+                    self.db_mocks[dep_name] = MockDatabase()
+                mocks[dep_name] = self.db_mocks[dep_name]
+            elif resource_type == "redis":
+                if dep_name not in self.redis_mocks:
+                    self.redis_mocks[dep_name] = MockRedis()
+                mocks[dep_name] = self.redis_mocks[dep_name]
+            else:
+                logger.warning(
+                    f"Unknown resource type '{resource_type}' for dependency '{dep_name}'"
+                )
+                continue
+            self.all_mocks[dep_name] = mocks[dep_name]
+            logger.info(f"Created mock dependency '{dep_name}' of type '{resource_type}'")
+        return mocks
+    def get_hitl_handler(self) -> MockHITLHandler:
+        """Get the HITL mock handler."""
+        return self.hitl_mock
+    def clear_all(self) -> None:
+        """Clear all mock configurations and history."""
+        self.http_mocks.clear()
+        self.db_mocks.clear()
+        self.redis_mocks.clear()
+        self.hitl_mock.clear_history()
+        self.all_mocks.clear()
+        logger.debug("Cleared all mocks")
+    def get_mock(self, dep_name: str) -> Optional[Any]:
+        """Get a specific mock by name."""
+        return self.all_mocks.get(dep_name)

tactus/testing/mock_tools.py ADDED Viewed

@@ -0,0 +1,133 @@
+"""
+Mock tool system for deterministic BDD testing.
+Provides mocked tool responses for fast, repeatable tests
+without requiring actual LLM calls or external services.
+"""
+import logging
+from typing import Any, Dict
+from tactus.primitives.tool import ToolPrimitive, ToolCall
+logger = logging.getLogger(__name__)
+class MockToolRegistry:
+    """
+    Registry for mock tool responses.
+    Maps tool names to mock responses (static or callable).
+    """
+    def __init__(self):
+        self.mocks: Dict[str, Any] = {}
+    def register(self, tool_name: str, response: Any) -> None:
+        """
+        Register a mock response for a tool.
+        Args:
+            tool_name: Name of the tool to mock
+            response: Mock response (can be static value or callable)
+        """
+        self.mocks[tool_name] = response
+        logger.debug(f"Registered mock for tool: {tool_name}")
+    def get_response(self, tool_name: str, args: Dict) -> Any:
+        """
+        Get mock response for tool call.
+        Args:
+            tool_name: Name of the tool
+            args: Arguments passed to the tool
+        Returns:
+            Mock response
+        Raises:
+            ValueError: If no mock registered for tool
+        """
+        if tool_name not in self.mocks:
+            raise ValueError(f"No mock registered for tool: {tool_name}")
+        response = self.mocks[tool_name]
+        # Support callable mocks for dynamic responses
+        if callable(response):
+            return response(args)
+        return response
+    def has_mock(self, tool_name: str) -> bool:
+        """Check if tool has a mock registered."""
+        return tool_name in self.mocks
+    def clear(self) -> None:
+        """Clear all registered mocks."""
+        self.mocks.clear()
+class MockedToolPrimitive(ToolPrimitive):
+    """
+    Tool primitive that uses mocked responses instead of real tool execution.
+    Useful for:
+    - Fast, deterministic tests
+    - Testing without API keys
+    - Avoiding external service calls
+    """
+    def __init__(self, mock_registry: MockToolRegistry):
+        super().__init__()
+        self.mock_registry = mock_registry
+    def record_call(
+        self, tool_name: str, args: Dict[str, Any], result: Any = None, agent_name: str = None
+    ) -> Any:
+        """
+        Record tool call and return mock response.
+        Args:
+            tool_name: Name of the tool
+            args: Tool arguments
+            result: Optional result (ignored in mock mode - we use mock registry)
+            agent_name: Optional agent name (for compatibility with base class)
+        Returns:
+            Mocked tool result (or default if no mock registered)
+        """
+        # Get mock response, or use default if not registered
+        # Ignore the passed result - we always use mock responses
+        if self.mock_registry.has_mock(tool_name):
+            mock_result = self.mock_registry.get_response(tool_name, args)
+        else:
+            # No mock registered - use a default response
+            # This allows agent mocks to call tools that don't have explicit mocks
+            mock_result = {"status": "ok", "tool": tool_name}
+            logger.debug(f"No mock registered for {tool_name}, using default response")
+        # Record the call (same as real ToolPrimitive)
+        call = ToolCall(tool_name, args, mock_result)
+        self._tool_calls.append(call)
+        self._last_calls[tool_name] = call
+        logger.info(f"Mocked tool call: {tool_name}(args={args}) -> {mock_result}")
+        return mock_result
+def create_default_mocks() -> Dict[str, Any]:
+    """
+    Create default mock responses for common tools.
+    Returns:
+        Dict of tool_name -> mock_response
+    """
+    return {
+        "done": {"status": "complete", "message": "Task completed"},
+        "search": {"results": ["result1", "result2", "result3"]},
+        "write_file": {"success": True, "path": "/tmp/test.txt"},
+        "read_file": {"content": "test content"},
+    }