PyPI - tactus - Versions diffs - 0.30.0__py3-none-any.whl → 0.31.1__py3-none-any.whl - Mend

tactus 0.30.0py3-none-any.whl → 0.31.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

tactus/__init__.py +1 -1
tactus/adapters/lua_tools.py +23 -1
tactus/adapters/mcp_manager.py +62 -35
tactus/broker/server.py +314 -0
tactus/cli/app.py +11 -1
tactus/core/dsl_stubs.py +138 -41
tactus/core/output_validator.py +69 -15
tactus/core/registry.py +13 -25
tactus/core/runtime.py +208 -69
tactus/dspy/agent.py +87 -30
tactus/ide/server.py +0 -10
tactus/primitives/__init__.py +0 -2
tactus/primitives/handles.py +8 -3
tactus/primitives/procedure_callable.py +36 -0
tactus/protocols/config.py +0 -5
tactus/protocols/result.py +3 -3
tactus/stdlib/tac/tactus/tools/done.tac +1 -1
tactus/stdlib/tac/tactus/tools/log.tac +1 -1
tactus/testing/README.md +1 -12
tactus/testing/behave_integration.py +12 -2
tactus/testing/context.py +156 -46
tactus/testing/mock_agent.py +43 -8
tactus/testing/steps/builtin.py +264 -54
tactus/testing/test_runner.py +6 -0
tactus/validation/semantic_visitor.py +19 -11
{tactus-0.30.0.dist-info → tactus-0.31.1.dist-info}/METADATA +9 -11
{tactus-0.30.0.dist-info → tactus-0.31.1.dist-info}/RECORD +30 -31
tactus/primitives/stage.py +0 -202
{tactus-0.30.0.dist-info → tactus-0.31.1.dist-info}/WHEEL +0 -0
{tactus-0.30.0.dist-info → tactus-0.31.1.dist-info}/entry_points.txt +0 -0
{tactus-0.30.0.dist-info → tactus-0.31.1.dist-info}/licenses/LICENSE +0 -0

tactus/core/runtime.py CHANGED Viewed

@@ -42,7 +42,6 @@ from tactus.primitives.human import HumanPrimitive
 from tactus.primitives.step import StepPrimitive, CheckpointPrimitive
 from tactus.primitives.log import LogPrimitive
 from tactus.primitives.message_history import MessageHistoryPrimitive
-from tactus.primitives.stage import StagePrimitive
 from tactus.primitives.json import JsonPrimitive
 from tactus.primitives.retry import RetryPrimitive
 from tactus.primitives.file import FilePrimitive
@@ -137,7 +136,6 @@ class TactusRuntime:
         self.step_primitive: Optional[StepPrimitive] = None
         self.checkpoint_primitive: Optional[CheckpointPrimitive] = None
         self.log_primitive: Optional[LogPrimitive] = None
-        self.stage_primitive: Optional[StagePrimitive] = None
         self.json_primitive: Optional[JsonPrimitive] = None
         self.retry_primitive: Optional[RetryPrimitive] = None
         self.file_primitive: Optional[FilePrimitive] = None
@@ -160,6 +158,8 @@ class TactusRuntime:
         # Mock manager for testing
         self.mock_manager: Optional[Any] = None  # MockManager instance
+        self.external_agent_mocks: Optional[dict[str, list[dict[str, Any]]]] = None
+        self.mock_all_agents: bool = False
         logger.info(f"TactusRuntime initialized for procedure {procedure_id}")
@@ -308,6 +308,10 @@ class TactusRuntime:
             if format == "lua":
                 logger.info("Step 1: Parsing Lua DSL configuration")
+                # Script mode: wrap top-level executable code in an implicit main Procedure
+                # so agents/tools aren't executed during parsing.
+                source = self._maybe_transform_script_mode_source(source)
                 # Pass placeholder_tool so tool() can return callable ToolHandles
                 self.registry = self._parse_declarations(source, placeholder_tool)
                 logger.info("Loaded procedure from Lua DSL")
@@ -325,6 +329,31 @@ class TactusRuntime:
                         self.mock_manager.enable_mock(tool_name)
                         logger.debug(f"Registered and enabled mock for tool '{tool_name}'")
+                # Apply external, per-scenario agent mocks (from BDD steps).
+                # These should take precedence over any `Mocks { ... }` declared in the .tac file.
+                if self.external_agent_mocks and self.registry:
+                    from tactus.core.registry import AgentMockConfig
+                    for agent_name, temporal_turns in self.external_agent_mocks.items():
+                        if not isinstance(temporal_turns, list):
+                            raise TactusRuntimeError(
+                                f"External agent mocks for '{agent_name}' must be a list of turns"
+                            )
+                        self.registry.agent_mocks[agent_name] = AgentMockConfig(
+                            temporal=temporal_turns
+                        )
+                # If we're in mocked mode, ensure agents are mocked deterministically even if
+                # the .tac file doesn't declare `Mocks { ... }` for them.
+                if self.mock_all_agents and self.registry:
+                    from tactus.core.registry import AgentMockConfig
+                    for agent_name in self.registry.agents.keys():
+                        if agent_name not in self.registry.agent_mocks:
+                            self.registry.agent_mocks[agent_name] = AgentMockConfig(
+                                message=f"Mocked response from {agent_name}"
+                            )
                 # Merge external config (from .tac.yml) into self.config
                 # External config provides toolsets, default_toolsets, etc.
                 if self.external_config:
@@ -398,10 +427,6 @@ class TactusRuntime:
             self.message_history_primitive = MessageHistoryPrimitive(
                 message_history_manager=self.message_history_manager
             )
-            declared_stages = self.config.get("stages", [])
-            self.stage_primitive = StagePrimitive(
-                declared_stages=declared_stages, lua_sandbox=self.lua_sandbox
-            )
             self.json_primitive = JsonPrimitive(lua_sandbox=self.lua_sandbox)
             self.retry_primitive = RetryPrimitive()
             self.file_primitive = FilePrimitive(execution_context=self.execution_context)
@@ -1419,11 +1444,13 @@ class TactusRuntime:
                 # Handle tools list (can be tool names or inline definitions)
                 tools_list = definition["tools"]
-                # Check if we have inline tool definitions (dicts with 'name' and 'handler')
+                # Check if we have inline tool definitions (dicts with a Lua handler)
                 has_inline_tools = False
                 if isinstance(tools_list, list):
                     for item in tools_list:
-                        if isinstance(item, dict) and "handler" in item:
+                        if isinstance(item, dict) and (
+                            "handler" in item or (1 in item and callable(item.get(1)))
+                        ):
                             has_inline_tools = True
                             break
@@ -1714,14 +1741,17 @@ class TactusRuntime:
                 f"Agent '{agent_name}' using provider '{provider_name}' with model '{model_id}'"
             )
-            # Handle inline Lua function tools
+            # Handle inline Lua function tools (agent.inline_tools)
             inline_tools_toolset = None
-            if "inline_tool_defs" in agent_config and agent_config["inline_tool_defs"]:
-                tools_spec = agent_config["inline_tool_defs"]
+            if "inline_tools" in agent_config and agent_config["inline_tools"]:
+                tools_spec = agent_config["inline_tools"]
                 # These are inline tool definitions (dicts with 'handler' key)
                 if isinstance(tools_spec, list):
                     inline_tool_specs = [
-                        t for t in tools_spec if isinstance(t, dict) and "handler" in t
+                        t
+                        for t in tools_spec
+                        if isinstance(t, dict)
+                        and ("handler" in t or (1 in t and callable(t.get(1))))
                     ]
                     if inline_tool_specs:
                         # These are inline Lua function tools
@@ -1742,38 +1772,36 @@ class TactusRuntime:
                                 f"Could not import LuaToolsAdapter for agent '{agent_name}': {e}"
                             )
-            # Get toolsets for this agent
+            # Get tools (tool/toolset references) for this agent
             # Use a sentinel value to distinguish "not present" from "present but None/empty"
             _MISSING = object()
-            agent_toolsets_config = agent_config.get("toolsets", _MISSING)
+            agent_tools_config = agent_config.get("tools", _MISSING)
             # Debug log
             logger.debug(
-                f"Agent '{agent_name}' raw toolsets config: {agent_toolsets_config}, type: {type(agent_toolsets_config)}"
+                f"Agent '{agent_name}' raw tools config: {agent_tools_config}, type: {type(agent_tools_config)}"
             )
             # Convert Lua table to Python list if needed
             if (
-                agent_toolsets_config is not _MISSING
-                and agent_toolsets_config is not None
-                and hasattr(agent_toolsets_config, "__len__")
+                agent_tools_config is not _MISSING
+                and agent_tools_config is not None
+                and hasattr(agent_tools_config, "__len__")
             ):
                 try:
                     # Try to convert Lua table to list
-                    agent_toolsets_config = (
-                        list(agent_toolsets_config.values())
-                        if hasattr(agent_toolsets_config, "values")
-                        else list(agent_toolsets_config)
-                    )
-                    logger.debug(
-                        f"Agent '{agent_name}' converted toolsets to: {agent_toolsets_config}"
+                    agent_tools_config = (
+                        list(agent_tools_config.values())
+                        if hasattr(agent_tools_config, "values")
+                        else list(agent_tools_config)
                     )
+                    logger.debug(f"Agent '{agent_name}' converted tools to: {agent_tools_config}")
                 except (TypeError, AttributeError):
                     # If conversion fails, leave as-is
                     pass
-            if agent_toolsets_config is _MISSING:
-                # No toolsets key present - use default toolsets if configured, otherwise all
+            if agent_tools_config is _MISSING:
+                # No tools key present - use default toolsets if configured, otherwise all
                 if default_toolset_names:
                     filtered_toolsets = self._parse_toolset_expressions(default_toolset_names)
                     logger.info(
@@ -1785,17 +1813,15 @@ class TactusRuntime:
                     logger.info(
                         f"Agent '{agent_name}' using all available toolsets (no defaults configured)"
                     )
-            elif isinstance(agent_toolsets_config, list) and len(agent_toolsets_config) == 0:
-                # Explicitly empty list - no toolsets
+            elif isinstance(agent_tools_config, list) and len(agent_tools_config) == 0:
+                # Explicitly empty list - no tools
                 # Use None instead of [] to completely disable tool calling for Bedrock models
                 filtered_toolsets = None
-                logger.info(
-                    f"Agent '{agent_name}' has NO toolsets (explicitly empty - passing None)"
-                )
+                logger.info(f"Agent '{agent_name}' has NO tools (explicitly empty - passing None)")
             else:
                 # Parse toolset expressions
-                logger.info(f"Agent '{agent_name}' raw toolsets config: {agent_toolsets_config}")
-                filtered_toolsets = self._parse_toolset_expressions(agent_toolsets_config)
+                logger.info(f"Agent '{agent_name}' raw tools config: {agent_tools_config}")
+                filtered_toolsets = self._parse_toolset_expressions(agent_tools_config)
                 logger.info(f"Agent '{agent_name}' parsed toolsets: {filtered_toolsets}")
             # Append inline tools toolset if present
@@ -1834,13 +1860,21 @@ class TactusRuntime:
                 except Exception as e:
                     logger.warning(f"Failed to create output model for agent '{agent_name}': {e}")
             elif self.config.get("output"):
-                # Use procedure-level output schema
-                output_schema = self.config["output"]
-                try:
-                    self._create_output_model_from_schema(output_schema, f"{agent_name}Output")
-                    logger.info(f"Using procedure-level output schema for agent '{agent_name}'")
-                except Exception as e:
-                    logger.warning(f"Failed to create output model from procedure schema: {e}")
+                # Procedure-level output schemas apply to procedures, not agents.
+                # Only use them as a fallback for agent structured output when they are
+                # object-shaped (i.e., a dict of fields). Scalar procedure outputs
+                # (e.g., `output = field.string{...}`) are not agent output schemas.
+                procedure_output_schema = self.config["output"]
+                if (
+                    isinstance(procedure_output_schema, dict)
+                    and "type" not in procedure_output_schema
+                ):
+                    output_schema = procedure_output_schema
+                    try:
+                        self._create_output_model_from_schema(output_schema, f"{agent_name}Output")
+                        logger.info(f"Using procedure-level output schema for agent '{agent_name}'")
+                    except Exception as e:
+                        logger.warning(f"Failed to create output model from procedure schema: {e}")
             # Extract message history filter if configured
             message_history_filter = None
@@ -2239,22 +2273,6 @@ class TactusRuntime:
             logger.info(f"Injecting MessageHistory primitive: {self.message_history_primitive}")
             self.lua_sandbox.inject_primitive("MessageHistory", self.message_history_primitive)
-        if self.stage_primitive:
-            logger.info(f"Injecting Stage primitive: {self.stage_primitive}")
-            # Create wrapper to map 'is' (reserved keyword in Python) to 'is_current'
-            class StageWrapper:
-                def __init__(self, stage_primitive):
-                    self._stage = stage_primitive
-                def __getattr__(self, name):
-                    if name == "is":
-                        return self._stage.is_current
-                    return getattr(self._stage, name)
-            stage_wrapper = StageWrapper(self.stage_primitive)
-            self.lua_sandbox.inject_primitive("Stage", stage_wrapper)
         if self.json_primitive:
             logger.info(f"Injecting Json primitive: {self.json_primitive}")
             self.lua_sandbox.inject_primitive("Json", self.json_primitive)
@@ -2391,6 +2409,135 @@ class TactusRuntime:
             logger.error(f"Legacy procedure execution failed: {e}")
             raise
+    def _maybe_transform_script_mode_source(self, source: str) -> str:
+        """
+        Transform "script mode" source into an implicit Procedure wrapper.
+        Script mode allows:
+          input { ... }
+          output { ... }
+          -- declarations (Agent/Tool/Mocks/etc.)
+          -- executable code
+          return {...}
+        During parsing, the Lua chunk is executed to collect declarations, but agents
+        are not yet wired to toolsets/LLMs. Without transformation, top-level code
+        would execute too early. We split declaration blocks from executable code and
+        wrap the executable portion into an implicit `Procedure { function(input) ... end }`.
+        """
+        import re
+        # If an explicit Procedure exists (any syntax), do not transform.
+        # Examples:
+        #   Procedure { ... }
+        #   main = Procedure { ... }
+        #   Procedure "main" { ... }
+        #   main = Procedure "main" { ... }
+        if re.search(r"(?m)^\s*(?:[A-Za-z_][A-Za-z0-9_]*\s*=\s*)?Procedure\b", source):
+            return source
+        # Detect script mode by top-level input/output declarations OR a top-level `return`.
+        # We intentionally treat simple "hello world" scripts as script-mode so agent/tool
+        # calls don't execute during the parse/declaration phase.
+        if not re.search(r"(?m)^\s*(input|output)\s*\{", source) and not re.search(
+            r"(?m)^\s*return\b", source
+        ):
+            return source
+        # Split into declaration prefix vs executable body.
+        decl_lines: list[str] = []
+        body_lines: list[str] = []
+        # Once we enter executable code, everything stays in the body.
+        in_body = False
+        brace_depth = 0
+        long_string_eq: str | None = None
+        decl_start = re.compile(
+            r"^\s*(?:"
+            r"input|output|Mocks|Agent|Toolset|Tool|Model|Module|Signature|LM|Dependency|Prompt|"
+            r"Specifications|Evaluation|Evaluations|"
+            r"default_provider|default_model|return_prompt|error_prompt|status_prompt|async|"
+            r"max_depth|max_turns"
+            r")\b"
+        )
+        require_stmt = re.compile(r"^\s*(?:local\s+)?[A-Za-z_][A-Za-z0-9_]*\s*=\s*require\(")
+        assignment_decl = re.compile(
+            r"^\s*[A-Za-z_][A-Za-z0-9_]*\s*=\s*(?:"
+            r"Agent|Toolset|Tool|Model|Module|Signature|LM|Dependency|Prompt"
+            r")\b"
+        )
+        long_string_open = re.compile(r"\[(=*)\[")
+        for line in source.splitlines():
+            if in_body:
+                body_lines.append(line)
+                continue
+            stripped = line.strip()
+            # If we're inside a Lua long-bracket string (e.g., Specification([[ ... ]]) / Specifications([[ ... ]]))
+            # keep consuming lines as declarations until we see the closing delimiter.
+            if long_string_eq is not None:
+                decl_lines.append(line)
+                if f"]{long_string_eq}]" in line:
+                    long_string_eq = None
+                continue
+            # If we're inside a declaration block, keep consuming until braces balance.
+            added_to_decl = False
+            if brace_depth > 0:
+                decl_lines.append(line)
+                added_to_decl = True
+            elif stripped == "" or stripped.startswith("--"):
+                decl_lines.append(line)
+                added_to_decl = True
+            elif decl_start.match(line) or assignment_decl.match(line) or require_stmt.match(line):
+                decl_lines.append(line)
+                added_to_decl = True
+            else:
+                in_body = True
+                body_lines.append(line)
+            # Track Lua long-bracket strings opened in the declaration prefix (e.g. Specification([[...]])).
+            # We only need a lightweight heuristic here; spec/eval blocks should be simple and well-formed.
+            if added_to_decl:
+                m = long_string_open.search(line)
+                if m:
+                    eq = m.group(1)
+                    # If the opening and closing are on the same line, don't enter long-string mode.
+                    if f"]{eq}]" not in line[m.end() :]:
+                        long_string_eq = eq
+            # Update brace depth based on a lightweight heuristic (sufficient for DSL blocks).
+            # This intentionally ignores Lua string/comment edge cases; declarations should be simple.
+            brace_depth += line.count("{") - line.count("}")
+            if brace_depth < 0:
+                brace_depth = 0
+        # If there is no executable code, nothing to wrap.
+        if not any(line.strip() for line in body_lines):
+            return source
+        # Indent executable code inside the implicit procedure function.
+        indented_body = "\n".join(("    " + line) if line != "" else "" for line in body_lines)
+        transformed = "\n".join(
+            [
+                *decl_lines,
+                "",
+                "Procedure {",
+                "    function(input)",
+                indented_body,
+                "    end",
+                "}",
+                "",
+            ]
+        )
+        return transformed
     def _process_template(self, template: str, context: Dict[str, Any]) -> str:
         """
         Process a template string with variable substitution.
@@ -2620,9 +2767,9 @@ class TactusRuntime:
                     "provider": agent.provider,
                     "model": agent.model,
                     "system_prompt": agent.system_prompt,
-                    # Use toolsets instead of tools (breaking change)
+                    # Tools control tool calling availability (tool/toolset references + expressions)
                     # Keep empty list as [] (not None) to preserve "explicitly no tools" intent
-                    "toolsets": agent.tools,
+                    "tools": agent.tools,
                     "max_turns": agent.max_turns,
                     "disable_streaming": agent.disable_streaming,
                 }
@@ -2634,8 +2781,8 @@ class TactusRuntime:
                 if agent.model_type is not None:
                     config["agents"][name]["model_type"] = agent.model_type
                 # Include inline tool definitions if present
-                if hasattr(agent, "inline_tool_defs") and agent.inline_tool_defs:
-                    config["agents"][name]["inline_tool_defs"] = agent.inline_tool_defs
+                if agent.inline_tools:
+                    config["agents"][name]["inline_tools"] = agent.inline_tools
                 if agent.initial_message:
                     config["agents"][name]["initial_message"] = agent.initial_message
                 if agent.output:
@@ -2671,14 +2818,6 @@ class TactusRuntime:
                 if hitl.options:
                     config["hitl"][name]["options"] = hitl.options
-        # Convert stages
-        if registry.stages:
-            # Handle case where stages is [[list]] instead of [list]
-            if len(registry.stages) == 1 and isinstance(registry.stages[0], list):
-                config["stages"] = registry.stages[0]
-            else:
-                config["stages"] = registry.stages
         # Convert prompts
         if registry.prompts:
             config["prompts"] = registry.prompts

tactus/dspy/agent.py CHANGED Viewed

@@ -231,7 +231,7 @@ class DSPyAgentHandle:
     def _prediction_to_value(self, prediction: TactusPrediction) -> Any:
         """
-        Convert a Prediction into a stable `result.value`.
+        Convert a Prediction into a stable `result.output`.
         Default behavior:
         - Prefer the `response` field when present (string)
@@ -275,7 +275,7 @@ class DSPyAgentHandle:
     ) -> TactusResult:
         """Wrap a Prediction into the standard TactusResult."""
         return TactusResult(
-            value=self._prediction_to_value(prediction),
+            output=self._prediction_to_value(prediction),
             usage=usage_stats,
             cost_stats=cost_stats,
         )
@@ -610,7 +610,7 @@ class DSPyAgentHandle:
         new_messages = []
         # Determine user message
-        user_message = opts.get("inject")
+        user_message = opts.get("message")
         if self._turn_count == 1 and not user_message and self.initial_message:
             user_message = self.initial_message
@@ -692,7 +692,7 @@ class DSPyAgentHandle:
         new_messages = []
         # Determine user message
-        user_message = opts.get("inject")
+        user_message = opts.get("message")
         if self._turn_count == 1 and not user_message and self.initial_message:
             user_message = self.initial_message
@@ -753,7 +753,7 @@ class DSPyAgentHandle:
                    Default field 'message' is used as the user message.
                    Additional fields are passed as context.
                    Can also include per-turn overrides like:
-                   - tools: List[str] - Tool names to use
+                   - tools: List[Any] - Tool/toolset references and toolset expressions to use
                    - temperature: float - Override temperature
                    - max_tokens: int - Override max_tokens
@@ -765,6 +765,11 @@ class DSPyAgentHandle:
             print(result.response)
         """
         logger.debug(f"Agent '{self.name}' invoked via __call__()")
+        # Convenience: allow shorthand string calls in Lua:
+        #   worker("Hello") == worker({message = "Hello"})
+        if isinstance(inputs, str):
+            inputs = {"message": inputs}
         inputs = inputs or {}
         # Convert Lua table to dict if needed
@@ -780,10 +785,10 @@ class DSPyAgentHandle:
         # Build turn options (keeping per-turn overrides like tools, temperature, etc.)
         opts = {}
         if message:
-            opts["inject"] = message
+            opts["message"] = message
         # Pass remaining fields - some are per-turn overrides, others are context
-        override_keys = {"tools", "toolsets", "temperature", "max_tokens"}
+        override_keys = {"tools", "temperature", "max_tokens"}
         for key in override_keys:
             if key in inputs:
                 opts[key] = inputs[key]
@@ -826,7 +831,7 @@ class DSPyAgentHandle:
             configure_lm(model_for_litellm, **config_kwargs)
         # Extract options
-        user_message = opts.get("inject")
+        user_message = opts.get("message")
         # Use initial_message on first turn if no inject provided
         if self._turn_count == 1 and not user_message and self.initial_message:
@@ -896,16 +901,52 @@ class DSPyAgentHandle:
         # Get agent mock config from registry.agent_mocks
         mock_config = self.registry.agent_mocks[agent_name]
-        # Convert AgentMockConfig to format expected by _wrap_mock_response
-        # _wrap_mock_response expects: response (or message), tool_calls, data, usage
-        # We convert message -> response here for clarity
+        temporal_turns = getattr(mock_config, "temporal", None) or []
+        if temporal_turns:
+            injected = opts.get("message")
+            selected_turn = None
+            if injected is not None:
+                for turn in temporal_turns:
+                    if isinstance(turn, dict) and turn.get("when_message") == injected:
+                        selected_turn = turn
+                        break
+            if selected_turn is None:
+                idx = self._turn_count - 1  # 1-indexed turns
+                if idx < 0:
+                    idx = 0
+                if idx >= len(temporal_turns):
+                    idx = len(temporal_turns) - 1
+                selected_turn = temporal_turns[idx]
+            turn = selected_turn
+            if isinstance(turn, dict):
+                message = turn.get("message", mock_config.message)
+                tool_calls = turn.get("tool_calls", mock_config.tool_calls)
+                data = turn.get("data", mock_config.data)
+            else:
+                message = mock_config.message
+                tool_calls = mock_config.tool_calls
+                data = mock_config.data
+        else:
+            message = mock_config.message
+            tool_calls = mock_config.tool_calls
+            data = mock_config.data
+        # Convert AgentMockConfig to format expected by _wrap_mock_response.
+        # Important: we do NOT embed `data`/`usage` inside the prediction output by default.
+        # The canonical agent payload is `result.output`:
+        # - If the agent has an explicit output schema, we allow structured output via `data`.
+        # - Otherwise, `result.output` is the plain response string.
         mock_data = {
-            "response": mock_config.message,
-            "tool_calls": mock_config.tool_calls,
-            "data": mock_config.data,
-            "usage": mock_config.usage,
+            "response": message,
+            "tool_calls": tool_calls,
         }
+        if self.output_schema and data:
+            mock_data["data"] = data
         try:
             return self._wrap_mock_response(mock_data, opts)
         except Exception:
@@ -930,17 +971,19 @@ class DSPyAgentHandle:
         """
         from tactus.dspy.prediction import create_prediction
-        # Normalize mock data to match agent's output signature
-        # Mock data uses "message" field, but agent signature uses "response" field
-        normalized_data = dict(mock_data)
-        if "message" in normalized_data and "response" not in normalized_data:
-            normalized_data["response"] = normalized_data["message"]
+        response_text = None
+        if "response" in mock_data and isinstance(mock_data.get("response"), str):
+            response_text = mock_data["response"]
+        elif "message" in mock_data and isinstance(mock_data.get("message"), str):
+            response_text = mock_data["message"]
+        else:
+            response_text = ""
         # Track new messages for this turn
         new_messages = []
         # Determine user message
-        user_message = opts.get("inject")
+        user_message = opts.get("message")
         if self._turn_count == 1 and not user_message and self.initial_message:
             user_message = self.initial_message
@@ -951,24 +994,37 @@ class DSPyAgentHandle:
             self._history.add(user_msg)
         # Add assistant response to new_messages
-        if "response" in normalized_data:
-            assistant_msg = {"role": "assistant", "content": normalized_data["response"]}
+        if response_text:
+            assistant_msg = {"role": "assistant", "content": response_text}
             new_messages.append(assistant_msg)
             self._history.add(assistant_msg)
-        # Add message tracking to normalized data
-        normalized_data["__new_messages__"] = new_messages
-        normalized_data["__all_messages__"] = self._history.get()
+        prediction_fields: Dict[str, Any] = {}
+        tool_calls_list = mock_data.get("tool_calls", [])
+        if tool_calls_list:
+            prediction_fields["tool_calls"] = tool_calls_list
+        # If the agent has an explicit output schema, allow structured output via mock `data`.
+        # Otherwise default to plain string output.
+        data = mock_data.get("data")
+        if self.output_schema and isinstance(data, dict) and data:
+            prediction_fields.update(data)
+        else:
+            prediction_fields["response"] = response_text
+        # Add message tracking to prediction
+        prediction_fields["__new_messages__"] = new_messages
+        prediction_fields["__all_messages__"] = self._history.get()
         # Create prediction from normalized mock data
-        result = create_prediction(**normalized_data)
+        result = create_prediction(**prediction_fields)
         # Record all tool calls from the mock
         # This allows mocks to trigger Tool.called(...) behavior
         # Use getattr since _tool_primitive is set externally by runtime
         tool_primitive = getattr(self, "_tool_primitive", None)
-        if "tool_calls" in normalized_data and tool_primitive:
-            tool_calls_list = normalized_data.get("tool_calls", [])
+        if tool_calls_list and tool_primitive:
             if isinstance(tool_calls_list, list):
                 for tool_call in tool_calls_list:
                     if isinstance(tool_call, dict) and "tool" in tool_call:
@@ -978,7 +1034,8 @@ class DSPyAgentHandle:
                         # For done tool, extract reason for result
                         if tool_name == "done":
                             reason = tool_args.get(
-                                "reason", normalized_data.get("response", "Task completed (mocked)")
+                                "reason",
+                                response_text or "Task completed (mocked)",
                             )
                             tool_result = {"status": "completed", "reason": reason, "tool": "done"}
                         else:

tactus/ide/server.py CHANGED Viewed

@@ -434,15 +434,6 @@ def create_app(initial_workspace: Optional[str] = None, frontend_dist_dir: Optio
                     "scenario_count": len(scenarios),
                 }
-            # Extract stages (flatten if nested)
-            stages_list = []
-            if registry.stages:
-                for stage in registry.stages:
-                    if isinstance(stage, list):
-                        stages_list.extend(stage)
-                    else:
-                        stages_list.append(stage)
             # Extract evaluations summary
             evaluations_data = None
             if registry.pydantic_evaluations:
@@ -488,7 +479,6 @@ def create_app(initial_workspace: Optional[str] = None, frontend_dist_dir: Optio
                 "toolsets": {name: toolset for name, toolset in registry.toolsets.items()},
                 "tools": sorted(list(all_tools)),
                 "specifications": specifications_data,
-                "stages": stages_list,
                 "evaluations": evaluations_data,
             }

tactus 0.30.0__py3-none-any.whl → 0.31.1__py3-none-any.whl

tactus 0.30.0py3-none-any.whl → 0.31.1py3-none-any.whl