PyPI - tactus - Versions diffs - 0.30.0__py3-none-any.whl → 0.31.1__py3-none-any.whl - Mend

tactus 0.30.0py3-none-any.whl → 0.31.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

tactus/__init__.py +1 -1
tactus/adapters/lua_tools.py +23 -1
tactus/adapters/mcp_manager.py +62 -35
tactus/broker/server.py +314 -0
tactus/cli/app.py +11 -1
tactus/core/dsl_stubs.py +138 -41
tactus/core/output_validator.py +69 -15
tactus/core/registry.py +13 -25
tactus/core/runtime.py +208 -69
tactus/dspy/agent.py +87 -30
tactus/ide/server.py +0 -10
tactus/primitives/__init__.py +0 -2
tactus/primitives/handles.py +8 -3
tactus/primitives/procedure_callable.py +36 -0
tactus/protocols/config.py +0 -5
tactus/protocols/result.py +3 -3
tactus/stdlib/tac/tactus/tools/done.tac +1 -1
tactus/stdlib/tac/tactus/tools/log.tac +1 -1
tactus/testing/README.md +1 -12
tactus/testing/behave_integration.py +12 -2
tactus/testing/context.py +156 -46
tactus/testing/mock_agent.py +43 -8
tactus/testing/steps/builtin.py +264 -54
tactus/testing/test_runner.py +6 -0
tactus/validation/semantic_visitor.py +19 -11
{tactus-0.30.0.dist-info → tactus-0.31.1.dist-info}/METADATA +9 -11
{tactus-0.30.0.dist-info → tactus-0.31.1.dist-info}/RECORD +30 -31
tactus/primitives/stage.py +0 -202
{tactus-0.30.0.dist-info → tactus-0.31.1.dist-info}/WHEEL +0 -0
{tactus-0.30.0.dist-info → tactus-0.31.1.dist-info}/entry_points.txt +0 -0
{tactus-0.30.0.dist-info → tactus-0.31.1.dist-info}/licenses/LICENSE +0 -0

tactus/primitives/__init__.py CHANGED Viewed

@@ -10,7 +10,6 @@ from tactus.primitives.control import IterationsPrimitive, StopPrimitive
 from tactus.primitives.tool import ToolPrimitive
 from tactus.primitives.log import LogPrimitive
 from tactus.primitives.step import StepPrimitive, CheckpointPrimitive
-from tactus.primitives.stage import StagePrimitive
 from tactus.primitives.json import JsonPrimitive
 from tactus.primitives.retry import RetryPrimitive
 from tactus.primitives.file import FilePrimitive
@@ -40,7 +39,6 @@ __all__ = [
     "StepPrimitive",
     "CheckpointPrimitive",
     "MessageHistoryPrimitive",
-    "StagePrimitive",
     "JsonPrimitive",
     "RetryPrimitive",
     "FilePrimitive",

tactus/primitives/handles.py CHANGED Viewed

@@ -87,7 +87,7 @@ class AgentHandle:
         self._execution_context: Optional[Any] = None
         logger.debug(f"AgentHandle created for '{name}'")
-    def __call__(self, inputs=None):
+    def __call__(self, inputs=None):
         """
         Execute an agent turn using the callable interface.
@@ -117,8 +117,13 @@ class AgentHandle:
                 f"This should not happen with immediate agent creation.\n"
                 f"Please report this as a bug with a minimal reproduction example."
             )
-        # Convert Lua table to Python dict if needed
-        converted_inputs = _convert_lua_table(inputs) if inputs is not None else None
+        # Convert Lua table to Python dict if needed
+        converted_inputs = _convert_lua_table(inputs) if inputs is not None else None
+        # Convenience: allow shorthand string calls in Lua:
+        #   World("Hello") == World({message = "Hello"})
+        if isinstance(converted_inputs, str):
+            converted_inputs = {"message": converted_inputs}
         # If we have an execution context, checkpoint the agent call
         logger.debug(

tactus/primitives/procedure_callable.py CHANGED Viewed

@@ -258,6 +258,42 @@ class ProcedureCallable:
         Raises:
             ValueError: If output is not a dict or missing required fields
         """
+        # If no output schema is declared, accept any return value.
+        if not self.output_schema:
+            return
+        # Scalar output schema support:
+        #   output = field.string{...}
+        if (
+            isinstance(self.output_schema, dict)
+            and "type" in self.output_schema
+            and isinstance(self.output_schema.get("type"), str)
+        ):
+            expected_type = self.output_schema.get("type")
+            if expected_type not in {"string", "number", "boolean", "object", "array"}:
+                # Not a scalar schema; treat as normal object schema.
+                expected_type = None
+        else:
+            expected_type = None
+        if expected_type is not None:
+            is_required = bool(self.output_schema.get("required", False))
+            if result is None and not is_required:
+                return
+            if expected_type == "string" and not isinstance(result, str):
+                raise ValueError(f"Procedure '{self.name}' must return string, got {type(result)}")
+            if expected_type == "number" and not isinstance(result, (int, float)):
+                raise ValueError(f"Procedure '{self.name}' must return number, got {type(result)}")
+            if expected_type == "boolean" and not isinstance(result, bool):
+                raise ValueError(f"Procedure '{self.name}' must return boolean, got {type(result)}")
+            if expected_type == "object" and not isinstance(result, dict):
+                raise ValueError(f"Procedure '{self.name}' must return object, got {type(result)}")
+            if expected_type == "array" and not isinstance(result, list):
+                raise ValueError(f"Procedure '{self.name}' must return array, got {type(result)}")
+            return
         if not isinstance(result, dict):
             raise ValueError(f"Procedure '{self.name}' must return dict, got {type(result)}")

tactus/protocols/config.py CHANGED Viewed

@@ -89,11 +89,6 @@ class ProcedureConfig(BaseModel):
     # HITL declarations
     hitl: Dict[str, Any] = Field(default_factory=dict, description="Pre-defined HITL interactions")
-    # Stages (optional)
-    stages: List[str] = Field(
-        default_factory=list, description="Optional stage names for workflow progression"
-    )
     # Sub-procedures (future)
     procedures: Dict[str, Any] = Field(
         default_factory=dict, description="Inline sub-procedure definitions (future feature)"

tactus/protocols/result.py CHANGED Viewed

@@ -1,7 +1,7 @@
 """
 Result object returned by cost-incurring primitives (e.g., Agents).
-Standardizes on `result.value` for the returned data (string or structured).
+Standardizes on `result.output` for the returned data (string or structured).
 """
 from __future__ import annotations
@@ -17,12 +17,12 @@ class TactusResult(BaseModel):
     """
     Standard Result wrapper for Lua and Python consumption.
-    - `value`: The returned data (string or structured dict/list/etc.)
+    - `output`: The returned data (string or structured dict/list/etc.)
     - `usage`: Token usage stats for the call that produced this result
     - `cost_stats`: Cost stats for the call that produced this result
     """
-    value: Any = Field(..., description="Result value (string or structured data)")
+    output: Any = Field(..., description="Result output (string or structured data)")
     usage: UsageStats = Field(default_factory=UsageStats)
     cost_stats: CostStats = Field(default_factory=CostStats)

tactus/stdlib/tac/tactus/tools/done.tac CHANGED Viewed

@@ -6,7 +6,7 @@ Usage:
     -- In an agent's toolset
     agent = Agent {
-        toolsets = {"done"},
+        tools = {"done"},
         ...
     }

tactus/stdlib/tac/tactus/tools/log.tac CHANGED Viewed

@@ -6,7 +6,7 @@ Usage:
     -- In an agent's toolset
     agent = Agent {
-        toolsets = {"log"},
+        tools = {"log"},
         ...
     }

tactus/testing/README.md CHANGED Viewed

@@ -7,7 +7,7 @@ First-class Gherkin-style BDD testing integrated into the Tactus DSL.
 The Tactus BDD Testing Framework allows you to write behavior-driven tests directly in your procedure files using Gherkin syntax. Tests are executed using Behave under the hood, with full support for:
 - **Natural language specifications** - Write tests in plain English using Gherkin
-- **Built-in step library** - Comprehensive steps for Tactus primitives (tools, stages, state, etc.)
+- **Built-in step library** - Comprehensive steps for Tactus primitives (tools, state, etc.)
 - **Custom steps** - Define your own steps in Lua for advanced assertions
 - **Parallel execution** - Run scenarios in parallel for fast feedback
 - **Consistency evaluation** - Run tests multiple times to measure reliability
@@ -88,15 +88,6 @@ Then the search tool should be called exactly 2 times
 Then the search tool should be called with query=test
 ```
-### Stage Steps
-```gherkin
-Given the procedure has started
-Then the stage should be processing
-Then the stage should transition from planning to executing
-Given we are in stage complete
-```
 ### State Steps
 ```gherkin
@@ -280,5 +271,3 @@ See `examples/with-bdd-tests.tac` for a complete example with:

tactus/testing/behave_integration.py CHANGED Viewed

@@ -8,7 +8,7 @@ from parsed Gherkin and registered steps.
 import logging
 import tempfile
 from pathlib import Path
-from typing import Dict, Optional
+from typing import Dict, List, Optional
 from .models import ParsedFeature, ParsedScenario
 from .steps.registry import StepRegistry
@@ -222,6 +222,8 @@ class BehaveEnvironmentGenerator:
         procedure_file: Path,
         mock_tools: Optional[Dict] = None,
         params: Optional[Dict] = None,
+        mcp_servers: Optional[Dict] = None,
+        tool_paths: Optional[List[str]] = None,
         mocked: bool = False,
     ) -> Path:
         """
@@ -244,6 +246,8 @@ class BehaveEnvironmentGenerator:
         mock_tools_json = json.dumps(mock_tools or {}).replace("'", "\\'")
         params_json = json.dumps(params or {}).replace("'", "\\'")
+        mcp_servers_json = json.dumps(mcp_servers or {}).replace("'", "\\'")
+        tool_paths_json = json.dumps(tool_paths or []).replace("'", "\\'")
         # Convert procedure_file to absolute path so it works from temp behave directory
         absolute_procedure_file = Path(procedure_file).resolve()
@@ -278,6 +282,8 @@ class BehaveEnvironmentGenerator:
             f.write(f"    context.procedure_file = Path(r'{absolute_procedure_file}')\n")
             f.write(f"    context.mock_tools = json.loads('{mock_tools_json}')\n")
             f.write(f"    context.params = json.loads('{params_json}')\n")
+            f.write(f"    context.mcp_servers = json.loads('{mcp_servers_json}')\n")
+            f.write(f"    context.tool_paths = json.loads('{tool_paths_json}')\n")
             f.write(f"    context.mocked = {mocked}\n\n")
             f.write("def before_scenario(context, scenario):\n")
@@ -291,6 +297,8 @@ class BehaveEnvironmentGenerator:
             f.write("        procedure_file=context.procedure_file,\n")
             f.write("        params=context.params,\n")
             f.write("        mock_tools=context.mock_tools,\n")
+            f.write("        mcp_servers=context.mcp_servers,\n")
+            f.write("        tool_paths=context.tool_paths,\n")
             f.write("        mocked=context.mocked,\n")
             f.write("    )\n")
             f.write("    \n")
@@ -330,6 +338,8 @@ def setup_behave_directory(
     work_dir: Optional[Path] = None,
     mock_tools: Optional[Dict] = None,
     params: Optional[Dict] = None,
+    mcp_servers: Optional[Dict] = None,
+    tool_paths: Optional[List[str]] = None,
     mocked: bool = False,
 ) -> Path:
     """
@@ -364,7 +374,7 @@ def setup_behave_directory(
     # Generate environment.py with mock tools, params, and mocked flag
     env_gen = BehaveEnvironmentGenerator()
-    env_gen.generate(work_dir, procedure_file, mock_tools, params, mocked)
+    env_gen.generate(work_dir, procedure_file, mock_tools, params, mcp_servers, tool_paths, mocked)
     logger.info(f"Behave directory setup complete: {work_dir}")
     return work_dir

tactus/testing/context.py CHANGED Viewed

@@ -27,11 +27,15 @@ class TactusTestContext:
         procedure_file: Path,
         params: Optional[Dict] = None,
         mock_tools: Optional[Dict] = None,
+        mcp_servers: Optional[Dict] = None,
+        tool_paths: Optional[List[str]] = None,
         mocked: bool = False,
     ):
         self.procedure_file = procedure_file
         self.params = params or {}
         self.mock_tools = mock_tools  # tool_name -> mock_response
+        self.mcp_servers = mcp_servers or {}
+        self.tool_paths = tool_paths or []
         self.mocked = mocked  # Whether to use mocked dependencies
         self.mock_registry = None  # Unified mock registry for dependencies + HITL
         self.runtime = None
@@ -41,6 +45,123 @@ class TactusTestContext:
         self.total_cost: float = 0.0  # Track total cost
         self.total_tokens: int = 0  # Track total tokens
         self.cost_breakdown: List[Any] = []  # Track per-call costs
+        self._agent_mock_turns: Dict[str, List[Dict[str, Any]]] = {}
+        self._scenario_message: str | None = None
+    def set_scenario_message(self, message: str) -> None:
+        """Set the scenario's primary injected message (for in-spec mocking coordination)."""
+        self._scenario_message = message
+    def get_scenario_message(self) -> str | None:
+        """Get the scenario's primary injected message, if set."""
+        return self._scenario_message
+    def mock_agent_response(
+        self, agent: str, message: str, when_message: str | None = None
+    ) -> None:
+        """Add a mocked agent response for this scenario (temporal; 1 per agent turn).
+        If `when_message` is provided, the mock is selected when the agent is called
+        with that exact injected message.
+        """
+        turn: Dict[str, Any] = {"message": message}
+        effective_when = when_message if when_message is not None else self._scenario_message
+        if effective_when is not None:
+            turn["when_message"] = effective_when
+        self._agent_mock_turns.setdefault(agent, []).append(turn)
+        # Ensure runtime exists and sees the same dict reference for this scenario.
+        if self.runtime is None:
+            self.setup_runtime()
+        if self.runtime is not None:
+            self.runtime.external_agent_mocks = self._agent_mock_turns
+    def mock_agent_tool_call(
+        self,
+        agent: str,
+        tool: str,
+        args: Dict[str, Any] | None = None,
+        when_message: str | None = None,
+    ) -> None:
+        """Add a mocked tool call to an agent's next mocked turn for this scenario."""
+        args = args or {}
+        effective_when = when_message if when_message is not None else self._scenario_message
+        if (
+            agent in self._agent_mock_turns
+            and self._agent_mock_turns[agent]
+            and (
+                effective_when is None
+                or self._agent_mock_turns[agent][-1].get("when_message") == effective_when
+            )
+        ):
+            turn = self._agent_mock_turns[agent][-1]
+        else:
+            turn = {}
+            if effective_when is not None:
+                turn["when_message"] = effective_when
+            self._agent_mock_turns.setdefault(agent, []).append(turn)
+        tool_calls = turn.get("tool_calls")
+        if not isinstance(tool_calls, list):
+            tool_calls = []
+            turn["tool_calls"] = tool_calls
+        tool_calls.append({"tool": tool, "args": args})
+        if self.runtime is None:
+            self.setup_runtime()
+        if self.runtime is not None:
+            self.runtime.external_agent_mocks = self._agent_mock_turns
+    def mock_agent_data(
+        self, agent: str, data: Dict[str, Any], when_message: str | None = None
+    ) -> None:
+        """Set structured output mock data for an agent's next mocked turn.
+        This is only used when an agent has an output schema; the DSPy agent mock
+        logic will apply `data` as the structured `result.output`.
+        """
+        if not isinstance(data, dict):
+            raise TypeError("mock_agent_data expects a dict")
+        effective_when = when_message if when_message is not None else self._scenario_message
+        if (
+            agent in self._agent_mock_turns
+            and self._agent_mock_turns[agent]
+            and (
+                effective_when is None
+                or self._agent_mock_turns[agent][-1].get("when_message") == effective_when
+            )
+        ):
+            turn = self._agent_mock_turns[agent][-1]
+        else:
+            turn = {}
+            if effective_when is not None:
+                turn["when_message"] = effective_when
+            self._agent_mock_turns.setdefault(agent, []).append(turn)
+        turn["data"] = data
+        if self.runtime is None:
+            self.setup_runtime()
+        if self.runtime is not None:
+            self.runtime.external_agent_mocks = self._agent_mock_turns
+    def mock_tool_returns(self, tool: str, output: Any) -> None:
+        """Configure a runtime tool mock (Mocks { tool = { returns = ... } } equivalent)."""
+        if self.runtime is None:
+            self.setup_runtime()
+        if self.runtime is None:
+            raise AssertionError("Runtime not initialized")
+        if self.runtime.mock_manager is None:
+            from tactus.core.mocking import MockManager
+            self.runtime.mock_manager = MockManager()
+        self.runtime.mock_manager.register_mock(tool, {"output": output})
+        self.runtime.mock_manager.enable_mock(tool)
     def setup_runtime(self) -> None:
         """Initialize TactusRuntime with storage and handlers."""
@@ -80,6 +201,8 @@ class TactusTestContext:
             openai_api_key=os.environ.get("OPENAI_API_KEY"),  # Pass API key for real LLM calls
             log_handler=log_handler,  # Enable cost tracking
             source_file_path=str(self.procedure_file.resolve()),  # For require() path resolution
+            mcp_servers=self.mcp_servers,
+            tool_paths=self.tool_paths,
         )
         # Create MockManager for handling Mocks {} blocks when in mocked mode
@@ -88,6 +211,8 @@ class TactusTestContext:
             self.runtime.mock_manager = MockManager()
             logger.info("Created MockManager for Mocks {} block support")
+            # Mocked-mode tests should never call real LLMs by default.
+            self.runtime.mock_all_agents = True
         logger.debug(f"Setup runtime for test: {self.procedure_file.stem}")
@@ -184,12 +309,6 @@ class TactusTestContext:
         except Exception as e:
             logger.debug(f"Could not capture Tool primitive: {e}")
-        # Capture Stage primitive
-        try:
-            self._primitives["stage"] = self.runtime.stage_primitive
-        except Exception as e:
-            logger.debug(f"Could not capture Stage primitive: {e}")
         # Capture State primitive
         try:
             self._primitives["state"] = self.runtime.state_primitive
@@ -243,36 +362,6 @@ class TactusTestContext:
             ]
         return []
-    # Stage-related methods
-    def current_stage(self) -> Optional[str]:
-        """Get current stage."""
-        stage_prim = self._primitives.get("stage")
-        if stage_prim:
-            return stage_prim.current()
-        return None
-    def stage_history(self) -> List[str]:
-        """Get stage transition history as list of stage names."""
-        stage_prim = self._primitives.get("stage")
-        if stage_prim and hasattr(stage_prim, "_history"):
-            # Extract just the stage names from history
-            stages = []
-            for transition in stage_prim._history:
-                if transition.get("from_stage"):
-                    stages.append(transition["from_stage"])
-                if transition.get("to_stage"):
-                    stages.append(transition["to_stage"])
-            # Remove duplicates while preserving order
-            seen = set()
-            result = []
-            for stage in stages:
-                if stage not in seen:
-                    seen.add(stage)
-                    result.append(stage)
-            return result
-        return []
     # State-related methods
     def state_get(self, key: str) -> Any:
@@ -296,12 +385,16 @@ class TactusTestContext:
         if self.execution_result:
             # Check if outputs are in a dedicated field
             if "output" in self.execution_result:
-                return self.execution_result["output"].get(key)
+                output = self.execution_result["output"]
+                if isinstance(output, dict):
+                    return output.get(key)
+                return None
             # Otherwise check in the result dict (procedure return value)
-            if "result" in self.execution_result and isinstance(
-                self.execution_result["result"], dict
-            ):
-                return self.execution_result["result"].get(key)
+            if "result" in self.execution_result:
+                result = self.execution_result["result"]
+                if isinstance(result, dict):
+                    return result.get(key)
         return None
     def output_exists(self, key: str) -> bool:
@@ -309,14 +402,31 @@ class TactusTestContext:
         if self.execution_result:
             # Check if outputs are in a dedicated field
             if "output" in self.execution_result:
-                return key in self.execution_result["output"]
+                output = self.execution_result["output"]
+                return isinstance(output, dict) and key in output
             # Otherwise check in the result dict (procedure return value)
-            if "result" in self.execution_result and isinstance(
-                self.execution_result["result"], dict
-            ):
-                return key in self.execution_result["result"]
+            if "result" in self.execution_result:
+                result = self.execution_result["result"]
+                if isinstance(result, dict):
+                    return key in result
         return False
+    def output_value(self) -> Any:
+        """Get the full (possibly scalar) output value for the procedure."""
+        if not self.execution_result:
+            return None
+        if "output" in self.execution_result:
+            return self.execution_result["output"]
+        result = self.execution_result.get("result")
+        try:
+            from tactus.protocols.result import TactusResult
+            if isinstance(result, TactusResult):
+                return result.output
+        except Exception:
+            pass
+        return result
     # Completion methods
     def stop_success(self) -> bool:

tactus/testing/mock_agent.py CHANGED Viewed

@@ -145,16 +145,51 @@ class MockAgentPrimitive:
                 f"}}"
             )
+        temporal_turns = getattr(mock_config, "temporal", None) or []
+        if temporal_turns:
+            injected = opts.get("message")
+            selected_turn = None
+            if injected is not None:
+                for turn in temporal_turns:
+                    if isinstance(turn, dict) and turn.get("when_message") == injected:
+                        selected_turn = turn
+                        break
+            if selected_turn is None:
+                idx = self.turn_count - 1  # 1-indexed turns
+                if idx < 0:
+                    idx = 0
+                if idx >= len(temporal_turns):
+                    idx = len(temporal_turns) - 1
+                selected_turn = temporal_turns[idx]
+            turn = selected_turn
+            if isinstance(turn, dict):
+                message = turn.get("message", mock_config.message)
+                tool_calls = turn.get("tool_calls", mock_config.tool_calls)
+                data = turn.get("data", mock_config.data)
+                raw_usage = turn.get("usage", mock_config.usage)
+            else:
+                message = mock_config.message
+                tool_calls = mock_config.tool_calls
+                data = mock_config.data
+                raw_usage = mock_config.usage
+        else:
+            message = mock_config.message
+            tool_calls = mock_config.tool_calls
+            data = mock_config.data
+            raw_usage = mock_config.usage
         # Execute the configured tool calls
-        tool_calls_executed = self._execute_tool_calls(mock_config.tool_calls)
+        tool_calls_executed = self._execute_tool_calls(tool_calls)
         # Structured payload (optional) for result.data
-        data = getattr(mock_config, "data", None) or {}
+        data = data or {}
         if not data:
-            data = {"response": mock_config.message}
+            data = {"response": message}
         # Token usage payload (optional) for result.usage
-        raw_usage = getattr(mock_config, "usage", None) or {}
         usage = dict(raw_usage) if isinstance(raw_usage, dict) else {}
         prompt_tokens = int(usage.get("prompt_tokens", 0) or 0)
         completion_tokens = int(usage.get("completion_tokens", 0) or 0)
@@ -167,16 +202,16 @@ class MockAgentPrimitive:
         usage.setdefault("total_tokens", total_tokens)
         # Messages generated in this turn
-        user_message = opts.get("message") or opts.get("inject")
+        user_message = opts.get("message")
         new_messages = []
         if user_message:
             new_messages.append({"role": "user", "content": user_message})
-        if mock_config.message:
-            new_messages.append({"role": "assistant", "content": mock_config.message})
+        if message:
+            new_messages.append({"role": "assistant", "content": message})
         # Return the configured message
         return MockAgentResult(
-            message=mock_config.message,
+            message=message,
             tool_calls=tool_calls_executed,
             data=data,
             usage=usage,

tactus 0.30.0__py3-none-any.whl → 0.31.1__py3-none-any.whl

tactus 0.30.0py3-none-any.whl → 0.31.1py3-none-any.whl