tactus 0.30.0__py3-none-any.whl → 0.31.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tactus/core/runtime.py CHANGED
@@ -42,7 +42,6 @@ from tactus.primitives.human import HumanPrimitive
42
42
  from tactus.primitives.step import StepPrimitive, CheckpointPrimitive
43
43
  from tactus.primitives.log import LogPrimitive
44
44
  from tactus.primitives.message_history import MessageHistoryPrimitive
45
- from tactus.primitives.stage import StagePrimitive
46
45
  from tactus.primitives.json import JsonPrimitive
47
46
  from tactus.primitives.retry import RetryPrimitive
48
47
  from tactus.primitives.file import FilePrimitive
@@ -137,7 +136,6 @@ class TactusRuntime:
137
136
  self.step_primitive: Optional[StepPrimitive] = None
138
137
  self.checkpoint_primitive: Optional[CheckpointPrimitive] = None
139
138
  self.log_primitive: Optional[LogPrimitive] = None
140
- self.stage_primitive: Optional[StagePrimitive] = None
141
139
  self.json_primitive: Optional[JsonPrimitive] = None
142
140
  self.retry_primitive: Optional[RetryPrimitive] = None
143
141
  self.file_primitive: Optional[FilePrimitive] = None
@@ -160,6 +158,8 @@ class TactusRuntime:
160
158
 
161
159
  # Mock manager for testing
162
160
  self.mock_manager: Optional[Any] = None # MockManager instance
161
+ self.external_agent_mocks: Optional[dict[str, list[dict[str, Any]]]] = None
162
+ self.mock_all_agents: bool = False
163
163
 
164
164
  logger.info(f"TactusRuntime initialized for procedure {procedure_id}")
165
165
 
@@ -308,6 +308,10 @@ class TactusRuntime:
308
308
  if format == "lua":
309
309
  logger.info("Step 1: Parsing Lua DSL configuration")
310
310
 
311
+ # Script mode: wrap top-level executable code in an implicit main Procedure
312
+ # so agents/tools aren't executed during parsing.
313
+ source = self._maybe_transform_script_mode_source(source)
314
+
311
315
  # Pass placeholder_tool so tool() can return callable ToolHandles
312
316
  self.registry = self._parse_declarations(source, placeholder_tool)
313
317
  logger.info("Loaded procedure from Lua DSL")
@@ -325,6 +329,31 @@ class TactusRuntime:
325
329
  self.mock_manager.enable_mock(tool_name)
326
330
  logger.debug(f"Registered and enabled mock for tool '{tool_name}'")
327
331
 
332
+ # Apply external, per-scenario agent mocks (from BDD steps).
333
+ # These should take precedence over any `Mocks { ... }` declared in the .tac file.
334
+ if self.external_agent_mocks and self.registry:
335
+ from tactus.core.registry import AgentMockConfig
336
+
337
+ for agent_name, temporal_turns in self.external_agent_mocks.items():
338
+ if not isinstance(temporal_turns, list):
339
+ raise TactusRuntimeError(
340
+ f"External agent mocks for '{agent_name}' must be a list of turns"
341
+ )
342
+ self.registry.agent_mocks[agent_name] = AgentMockConfig(
343
+ temporal=temporal_turns
344
+ )
345
+
346
+ # If we're in mocked mode, ensure agents are mocked deterministically even if
347
+ # the .tac file doesn't declare `Mocks { ... }` for them.
348
+ if self.mock_all_agents and self.registry:
349
+ from tactus.core.registry import AgentMockConfig
350
+
351
+ for agent_name in self.registry.agents.keys():
352
+ if agent_name not in self.registry.agent_mocks:
353
+ self.registry.agent_mocks[agent_name] = AgentMockConfig(
354
+ message=f"Mocked response from {agent_name}"
355
+ )
356
+
328
357
  # Merge external config (from .tac.yml) into self.config
329
358
  # External config provides toolsets, default_toolsets, etc.
330
359
  if self.external_config:
@@ -398,10 +427,6 @@ class TactusRuntime:
398
427
  self.message_history_primitive = MessageHistoryPrimitive(
399
428
  message_history_manager=self.message_history_manager
400
429
  )
401
- declared_stages = self.config.get("stages", [])
402
- self.stage_primitive = StagePrimitive(
403
- declared_stages=declared_stages, lua_sandbox=self.lua_sandbox
404
- )
405
430
  self.json_primitive = JsonPrimitive(lua_sandbox=self.lua_sandbox)
406
431
  self.retry_primitive = RetryPrimitive()
407
432
  self.file_primitive = FilePrimitive(execution_context=self.execution_context)
@@ -1419,11 +1444,13 @@ class TactusRuntime:
1419
1444
  # Handle tools list (can be tool names or inline definitions)
1420
1445
  tools_list = definition["tools"]
1421
1446
 
1422
- # Check if we have inline tool definitions (dicts with 'name' and 'handler')
1447
+ # Check if we have inline tool definitions (dicts with a Lua handler)
1423
1448
  has_inline_tools = False
1424
1449
  if isinstance(tools_list, list):
1425
1450
  for item in tools_list:
1426
- if isinstance(item, dict) and "handler" in item:
1451
+ if isinstance(item, dict) and (
1452
+ "handler" in item or (1 in item and callable(item.get(1)))
1453
+ ):
1427
1454
  has_inline_tools = True
1428
1455
  break
1429
1456
 
@@ -1714,14 +1741,17 @@ class TactusRuntime:
1714
1741
  f"Agent '{agent_name}' using provider '{provider_name}' with model '{model_id}'"
1715
1742
  )
1716
1743
 
1717
- # Handle inline Lua function tools
1744
+ # Handle inline Lua function tools (agent.inline_tools)
1718
1745
  inline_tools_toolset = None
1719
- if "inline_tool_defs" in agent_config and agent_config["inline_tool_defs"]:
1720
- tools_spec = agent_config["inline_tool_defs"]
1746
+ if "inline_tools" in agent_config and agent_config["inline_tools"]:
1747
+ tools_spec = agent_config["inline_tools"]
1721
1748
  # These are inline tool definitions (dicts with 'handler' key)
1722
1749
  if isinstance(tools_spec, list):
1723
1750
  inline_tool_specs = [
1724
- t for t in tools_spec if isinstance(t, dict) and "handler" in t
1751
+ t
1752
+ for t in tools_spec
1753
+ if isinstance(t, dict)
1754
+ and ("handler" in t or (1 in t and callable(t.get(1))))
1725
1755
  ]
1726
1756
  if inline_tool_specs:
1727
1757
  # These are inline Lua function tools
@@ -1742,38 +1772,36 @@ class TactusRuntime:
1742
1772
  f"Could not import LuaToolsAdapter for agent '{agent_name}': {e}"
1743
1773
  )
1744
1774
 
1745
- # Get toolsets for this agent
1775
+ # Get tools (tool/toolset references) for this agent
1746
1776
  # Use a sentinel value to distinguish "not present" from "present but None/empty"
1747
1777
  _MISSING = object()
1748
- agent_toolsets_config = agent_config.get("toolsets", _MISSING)
1778
+ agent_tools_config = agent_config.get("tools", _MISSING)
1749
1779
 
1750
1780
  # Debug log
1751
1781
  logger.debug(
1752
- f"Agent '{agent_name}' raw toolsets config: {agent_toolsets_config}, type: {type(agent_toolsets_config)}"
1782
+ f"Agent '{agent_name}' raw tools config: {agent_tools_config}, type: {type(agent_tools_config)}"
1753
1783
  )
1754
1784
 
1755
1785
  # Convert Lua table to Python list if needed
1756
1786
  if (
1757
- agent_toolsets_config is not _MISSING
1758
- and agent_toolsets_config is not None
1759
- and hasattr(agent_toolsets_config, "__len__")
1787
+ agent_tools_config is not _MISSING
1788
+ and agent_tools_config is not None
1789
+ and hasattr(agent_tools_config, "__len__")
1760
1790
  ):
1761
1791
  try:
1762
1792
  # Try to convert Lua table to list
1763
- agent_toolsets_config = (
1764
- list(agent_toolsets_config.values())
1765
- if hasattr(agent_toolsets_config, "values")
1766
- else list(agent_toolsets_config)
1767
- )
1768
- logger.debug(
1769
- f"Agent '{agent_name}' converted toolsets to: {agent_toolsets_config}"
1793
+ agent_tools_config = (
1794
+ list(agent_tools_config.values())
1795
+ if hasattr(agent_tools_config, "values")
1796
+ else list(agent_tools_config)
1770
1797
  )
1798
+ logger.debug(f"Agent '{agent_name}' converted tools to: {agent_tools_config}")
1771
1799
  except (TypeError, AttributeError):
1772
1800
  # If conversion fails, leave as-is
1773
1801
  pass
1774
1802
 
1775
- if agent_toolsets_config is _MISSING:
1776
- # No toolsets key present - use default toolsets if configured, otherwise all
1803
+ if agent_tools_config is _MISSING:
1804
+ # No tools key present - use default toolsets if configured, otherwise all
1777
1805
  if default_toolset_names:
1778
1806
  filtered_toolsets = self._parse_toolset_expressions(default_toolset_names)
1779
1807
  logger.info(
@@ -1785,17 +1813,15 @@ class TactusRuntime:
1785
1813
  logger.info(
1786
1814
  f"Agent '{agent_name}' using all available toolsets (no defaults configured)"
1787
1815
  )
1788
- elif isinstance(agent_toolsets_config, list) and len(agent_toolsets_config) == 0:
1789
- # Explicitly empty list - no toolsets
1816
+ elif isinstance(agent_tools_config, list) and len(agent_tools_config) == 0:
1817
+ # Explicitly empty list - no tools
1790
1818
  # Use None instead of [] to completely disable tool calling for Bedrock models
1791
1819
  filtered_toolsets = None
1792
- logger.info(
1793
- f"Agent '{agent_name}' has NO toolsets (explicitly empty - passing None)"
1794
- )
1820
+ logger.info(f"Agent '{agent_name}' has NO tools (explicitly empty - passing None)")
1795
1821
  else:
1796
1822
  # Parse toolset expressions
1797
- logger.info(f"Agent '{agent_name}' raw toolsets config: {agent_toolsets_config}")
1798
- filtered_toolsets = self._parse_toolset_expressions(agent_toolsets_config)
1823
+ logger.info(f"Agent '{agent_name}' raw tools config: {agent_tools_config}")
1824
+ filtered_toolsets = self._parse_toolset_expressions(agent_tools_config)
1799
1825
  logger.info(f"Agent '{agent_name}' parsed toolsets: {filtered_toolsets}")
1800
1826
 
1801
1827
  # Append inline tools toolset if present
@@ -1834,13 +1860,21 @@ class TactusRuntime:
1834
1860
  except Exception as e:
1835
1861
  logger.warning(f"Failed to create output model for agent '{agent_name}': {e}")
1836
1862
  elif self.config.get("output"):
1837
- # Use procedure-level output schema
1838
- output_schema = self.config["output"]
1839
- try:
1840
- self._create_output_model_from_schema(output_schema, f"{agent_name}Output")
1841
- logger.info(f"Using procedure-level output schema for agent '{agent_name}'")
1842
- except Exception as e:
1843
- logger.warning(f"Failed to create output model from procedure schema: {e}")
1863
+ # Procedure-level output schemas apply to procedures, not agents.
1864
+ # Only use them as a fallback for agent structured output when they are
1865
+ # object-shaped (i.e., a dict of fields). Scalar procedure outputs
1866
+ # (e.g., `output = field.string{...}`) are not agent output schemas.
1867
+ procedure_output_schema = self.config["output"]
1868
+ if (
1869
+ isinstance(procedure_output_schema, dict)
1870
+ and "type" not in procedure_output_schema
1871
+ ):
1872
+ output_schema = procedure_output_schema
1873
+ try:
1874
+ self._create_output_model_from_schema(output_schema, f"{agent_name}Output")
1875
+ logger.info(f"Using procedure-level output schema for agent '{agent_name}'")
1876
+ except Exception as e:
1877
+ logger.warning(f"Failed to create output model from procedure schema: {e}")
1844
1878
 
1845
1879
  # Extract message history filter if configured
1846
1880
  message_history_filter = None
@@ -2239,22 +2273,6 @@ class TactusRuntime:
2239
2273
  logger.info(f"Injecting MessageHistory primitive: {self.message_history_primitive}")
2240
2274
  self.lua_sandbox.inject_primitive("MessageHistory", self.message_history_primitive)
2241
2275
 
2242
- if self.stage_primitive:
2243
- logger.info(f"Injecting Stage primitive: {self.stage_primitive}")
2244
-
2245
- # Create wrapper to map 'is' (reserved keyword in Python) to 'is_current'
2246
- class StageWrapper:
2247
- def __init__(self, stage_primitive):
2248
- self._stage = stage_primitive
2249
-
2250
- def __getattr__(self, name):
2251
- if name == "is":
2252
- return self._stage.is_current
2253
- return getattr(self._stage, name)
2254
-
2255
- stage_wrapper = StageWrapper(self.stage_primitive)
2256
- self.lua_sandbox.inject_primitive("Stage", stage_wrapper)
2257
-
2258
2276
  if self.json_primitive:
2259
2277
  logger.info(f"Injecting Json primitive: {self.json_primitive}")
2260
2278
  self.lua_sandbox.inject_primitive("Json", self.json_primitive)
@@ -2391,6 +2409,135 @@ class TactusRuntime:
2391
2409
  logger.error(f"Legacy procedure execution failed: {e}")
2392
2410
  raise
2393
2411
 
2412
+ def _maybe_transform_script_mode_source(self, source: str) -> str:
2413
+ """
2414
+ Transform "script mode" source into an implicit Procedure wrapper.
2415
+
2416
+ Script mode allows:
2417
+ input { ... }
2418
+ output { ... }
2419
+ -- declarations (Agent/Tool/Mocks/etc.)
2420
+ -- executable code
2421
+ return {...}
2422
+
2423
+ During parsing, the Lua chunk is executed to collect declarations, but agents
2424
+ are not yet wired to toolsets/LLMs. Without transformation, top-level code
2425
+ would execute too early. We split declaration blocks from executable code and
2426
+ wrap the executable portion into an implicit `Procedure { function(input) ... end }`.
2427
+ """
2428
+ import re
2429
+
2430
+ # If an explicit Procedure exists (any syntax), do not transform.
2431
+ # Examples:
2432
+ # Procedure { ... }
2433
+ # main = Procedure { ... }
2434
+ # Procedure "main" { ... }
2435
+ # main = Procedure "main" { ... }
2436
+ if re.search(r"(?m)^\s*(?:[A-Za-z_][A-Za-z0-9_]*\s*=\s*)?Procedure\b", source):
2437
+ return source
2438
+
2439
+ # Detect script mode by top-level input/output declarations OR a top-level `return`.
2440
+ # We intentionally treat simple "hello world" scripts as script-mode so agent/tool
2441
+ # calls don't execute during the parse/declaration phase.
2442
+ if not re.search(r"(?m)^\s*(input|output)\s*\{", source) and not re.search(
2443
+ r"(?m)^\s*return\b", source
2444
+ ):
2445
+ return source
2446
+
2447
+ # Split into declaration prefix vs executable body.
2448
+ decl_lines: list[str] = []
2449
+ body_lines: list[str] = []
2450
+
2451
+ # Once we enter executable code, everything stays in the body.
2452
+ in_body = False
2453
+ brace_depth = 0
2454
+ long_string_eq: str | None = None
2455
+
2456
+ decl_start = re.compile(
2457
+ r"^\s*(?:"
2458
+ r"input|output|Mocks|Agent|Toolset|Tool|Model|Module|Signature|LM|Dependency|Prompt|"
2459
+ r"Specifications|Evaluation|Evaluations|"
2460
+ r"default_provider|default_model|return_prompt|error_prompt|status_prompt|async|"
2461
+ r"max_depth|max_turns"
2462
+ r")\b"
2463
+ )
2464
+ require_stmt = re.compile(r"^\s*(?:local\s+)?[A-Za-z_][A-Za-z0-9_]*\s*=\s*require\(")
2465
+ assignment_decl = re.compile(
2466
+ r"^\s*[A-Za-z_][A-Za-z0-9_]*\s*=\s*(?:"
2467
+ r"Agent|Toolset|Tool|Model|Module|Signature|LM|Dependency|Prompt"
2468
+ r")\b"
2469
+ )
2470
+
2471
+ long_string_open = re.compile(r"\[(=*)\[")
2472
+
2473
+ for line in source.splitlines():
2474
+ if in_body:
2475
+ body_lines.append(line)
2476
+ continue
2477
+
2478
+ stripped = line.strip()
2479
+
2480
+ # If we're inside a Lua long-bracket string (e.g., Specification([[ ... ]]) / Specifications([[ ... ]]))
2481
+ # keep consuming lines as declarations until we see the closing delimiter.
2482
+ if long_string_eq is not None:
2483
+ decl_lines.append(line)
2484
+ if f"]{long_string_eq}]" in line:
2485
+ long_string_eq = None
2486
+ continue
2487
+
2488
+ # If we're inside a declaration block, keep consuming until braces balance.
2489
+ added_to_decl = False
2490
+ if brace_depth > 0:
2491
+ decl_lines.append(line)
2492
+ added_to_decl = True
2493
+ elif stripped == "" or stripped.startswith("--"):
2494
+ decl_lines.append(line)
2495
+ added_to_decl = True
2496
+ elif decl_start.match(line) or assignment_decl.match(line) or require_stmt.match(line):
2497
+ decl_lines.append(line)
2498
+ added_to_decl = True
2499
+ else:
2500
+ in_body = True
2501
+ body_lines.append(line)
2502
+
2503
+ # Track Lua long-bracket strings opened in the declaration prefix (e.g. Specification([[...]])).
2504
+ # We only need a lightweight heuristic here; spec/eval blocks should be simple and well-formed.
2505
+ if added_to_decl:
2506
+ m = long_string_open.search(line)
2507
+ if m:
2508
+ eq = m.group(1)
2509
+ # If the opening and closing are on the same line, don't enter long-string mode.
2510
+ if f"]{eq}]" not in line[m.end() :]:
2511
+ long_string_eq = eq
2512
+
2513
+ # Update brace depth based on a lightweight heuristic (sufficient for DSL blocks).
2514
+ # This intentionally ignores Lua string/comment edge cases; declarations should be simple.
2515
+ brace_depth += line.count("{") - line.count("}")
2516
+ if brace_depth < 0:
2517
+ brace_depth = 0
2518
+
2519
+ # If there is no executable code, nothing to wrap.
2520
+ if not any(line.strip() for line in body_lines):
2521
+ return source
2522
+
2523
+ # Indent executable code inside the implicit procedure function.
2524
+ indented_body = "\n".join((" " + line) if line != "" else "" for line in body_lines)
2525
+
2526
+ transformed = "\n".join(
2527
+ [
2528
+ *decl_lines,
2529
+ "",
2530
+ "Procedure {",
2531
+ " function(input)",
2532
+ indented_body,
2533
+ " end",
2534
+ "}",
2535
+ "",
2536
+ ]
2537
+ )
2538
+
2539
+ return transformed
2540
+
2394
2541
  def _process_template(self, template: str, context: Dict[str, Any]) -> str:
2395
2542
  """
2396
2543
  Process a template string with variable substitution.
@@ -2620,9 +2767,9 @@ class TactusRuntime:
2620
2767
  "provider": agent.provider,
2621
2768
  "model": agent.model,
2622
2769
  "system_prompt": agent.system_prompt,
2623
- # Use toolsets instead of tools (breaking change)
2770
+ # Tools control tool calling availability (tool/toolset references + expressions)
2624
2771
  # Keep empty list as [] (not None) to preserve "explicitly no tools" intent
2625
- "toolsets": agent.tools,
2772
+ "tools": agent.tools,
2626
2773
  "max_turns": agent.max_turns,
2627
2774
  "disable_streaming": agent.disable_streaming,
2628
2775
  }
@@ -2634,8 +2781,8 @@ class TactusRuntime:
2634
2781
  if agent.model_type is not None:
2635
2782
  config["agents"][name]["model_type"] = agent.model_type
2636
2783
  # Include inline tool definitions if present
2637
- if hasattr(agent, "inline_tool_defs") and agent.inline_tool_defs:
2638
- config["agents"][name]["inline_tool_defs"] = agent.inline_tool_defs
2784
+ if agent.inline_tools:
2785
+ config["agents"][name]["inline_tools"] = agent.inline_tools
2639
2786
  if agent.initial_message:
2640
2787
  config["agents"][name]["initial_message"] = agent.initial_message
2641
2788
  if agent.output:
@@ -2671,14 +2818,6 @@ class TactusRuntime:
2671
2818
  if hitl.options:
2672
2819
  config["hitl"][name]["options"] = hitl.options
2673
2820
 
2674
- # Convert stages
2675
- if registry.stages:
2676
- # Handle case where stages is [[list]] instead of [list]
2677
- if len(registry.stages) == 1 and isinstance(registry.stages[0], list):
2678
- config["stages"] = registry.stages[0]
2679
- else:
2680
- config["stages"] = registry.stages
2681
-
2682
2821
  # Convert prompts
2683
2822
  if registry.prompts:
2684
2823
  config["prompts"] = registry.prompts
tactus/dspy/agent.py CHANGED
@@ -231,7 +231,7 @@ class DSPyAgentHandle:
231
231
 
232
232
  def _prediction_to_value(self, prediction: TactusPrediction) -> Any:
233
233
  """
234
- Convert a Prediction into a stable `result.value`.
234
+ Convert a Prediction into a stable `result.output`.
235
235
 
236
236
  Default behavior:
237
237
  - Prefer the `response` field when present (string)
@@ -275,7 +275,7 @@ class DSPyAgentHandle:
275
275
  ) -> TactusResult:
276
276
  """Wrap a Prediction into the standard TactusResult."""
277
277
  return TactusResult(
278
- value=self._prediction_to_value(prediction),
278
+ output=self._prediction_to_value(prediction),
279
279
  usage=usage_stats,
280
280
  cost_stats=cost_stats,
281
281
  )
@@ -610,7 +610,7 @@ class DSPyAgentHandle:
610
610
  new_messages = []
611
611
 
612
612
  # Determine user message
613
- user_message = opts.get("inject")
613
+ user_message = opts.get("message")
614
614
  if self._turn_count == 1 and not user_message and self.initial_message:
615
615
  user_message = self.initial_message
616
616
 
@@ -692,7 +692,7 @@ class DSPyAgentHandle:
692
692
  new_messages = []
693
693
 
694
694
  # Determine user message
695
- user_message = opts.get("inject")
695
+ user_message = opts.get("message")
696
696
  if self._turn_count == 1 and not user_message and self.initial_message:
697
697
  user_message = self.initial_message
698
698
 
@@ -753,7 +753,7 @@ class DSPyAgentHandle:
753
753
  Default field 'message' is used as the user message.
754
754
  Additional fields are passed as context.
755
755
  Can also include per-turn overrides like:
756
- - tools: List[str] - Tool names to use
756
+ - tools: List[Any] - Tool/toolset references and toolset expressions to use
757
757
  - temperature: float - Override temperature
758
758
  - max_tokens: int - Override max_tokens
759
759
 
@@ -765,6 +765,11 @@ class DSPyAgentHandle:
765
765
  print(result.response)
766
766
  """
767
767
  logger.debug(f"Agent '{self.name}' invoked via __call__()")
768
+ # Convenience: allow shorthand string calls in Lua:
769
+ # worker("Hello") == worker({message = "Hello"})
770
+ if isinstance(inputs, str):
771
+ inputs = {"message": inputs}
772
+
768
773
  inputs = inputs or {}
769
774
 
770
775
  # Convert Lua table to dict if needed
@@ -780,10 +785,10 @@ class DSPyAgentHandle:
780
785
  # Build turn options (keeping per-turn overrides like tools, temperature, etc.)
781
786
  opts = {}
782
787
  if message:
783
- opts["inject"] = message
788
+ opts["message"] = message
784
789
 
785
790
  # Pass remaining fields - some are per-turn overrides, others are context
786
- override_keys = {"tools", "toolsets", "temperature", "max_tokens"}
791
+ override_keys = {"tools", "temperature", "max_tokens"}
787
792
  for key in override_keys:
788
793
  if key in inputs:
789
794
  opts[key] = inputs[key]
@@ -826,7 +831,7 @@ class DSPyAgentHandle:
826
831
  configure_lm(model_for_litellm, **config_kwargs)
827
832
 
828
833
  # Extract options
829
- user_message = opts.get("inject")
834
+ user_message = opts.get("message")
830
835
 
831
836
  # Use initial_message on first turn if no inject provided
832
837
  if self._turn_count == 1 and not user_message and self.initial_message:
@@ -896,16 +901,52 @@ class DSPyAgentHandle:
896
901
  # Get agent mock config from registry.agent_mocks
897
902
  mock_config = self.registry.agent_mocks[agent_name]
898
903
 
899
- # Convert AgentMockConfig to format expected by _wrap_mock_response
900
- # _wrap_mock_response expects: response (or message), tool_calls, data, usage
901
- # We convert message -> response here for clarity
904
+ temporal_turns = getattr(mock_config, "temporal", None) or []
905
+ if temporal_turns:
906
+ injected = opts.get("message")
907
+
908
+ selected_turn = None
909
+ if injected is not None:
910
+ for turn in temporal_turns:
911
+ if isinstance(turn, dict) and turn.get("when_message") == injected:
912
+ selected_turn = turn
913
+ break
914
+
915
+ if selected_turn is None:
916
+ idx = self._turn_count - 1 # 1-indexed turns
917
+ if idx < 0:
918
+ idx = 0
919
+ if idx >= len(temporal_turns):
920
+ idx = len(temporal_turns) - 1
921
+ selected_turn = temporal_turns[idx]
922
+
923
+ turn = selected_turn
924
+ if isinstance(turn, dict):
925
+ message = turn.get("message", mock_config.message)
926
+ tool_calls = turn.get("tool_calls", mock_config.tool_calls)
927
+ data = turn.get("data", mock_config.data)
928
+ else:
929
+ message = mock_config.message
930
+ tool_calls = mock_config.tool_calls
931
+ data = mock_config.data
932
+ else:
933
+ message = mock_config.message
934
+ tool_calls = mock_config.tool_calls
935
+ data = mock_config.data
936
+
937
+ # Convert AgentMockConfig to format expected by _wrap_mock_response.
938
+ # Important: we do NOT embed `data`/`usage` inside the prediction output by default.
939
+ # The canonical agent payload is `result.output`:
940
+ # - If the agent has an explicit output schema, we allow structured output via `data`.
941
+ # - Otherwise, `result.output` is the plain response string.
902
942
  mock_data = {
903
- "response": mock_config.message,
904
- "tool_calls": mock_config.tool_calls,
905
- "data": mock_config.data,
906
- "usage": mock_config.usage,
943
+ "response": message,
944
+ "tool_calls": tool_calls,
907
945
  }
908
946
 
947
+ if self.output_schema and data:
948
+ mock_data["data"] = data
949
+
909
950
  try:
910
951
  return self._wrap_mock_response(mock_data, opts)
911
952
  except Exception:
@@ -930,17 +971,19 @@ class DSPyAgentHandle:
930
971
  """
931
972
  from tactus.dspy.prediction import create_prediction
932
973
 
933
- # Normalize mock data to match agent's output signature
934
- # Mock data uses "message" field, but agent signature uses "response" field
935
- normalized_data = dict(mock_data)
936
- if "message" in normalized_data and "response" not in normalized_data:
937
- normalized_data["response"] = normalized_data["message"]
974
+ response_text = None
975
+ if "response" in mock_data and isinstance(mock_data.get("response"), str):
976
+ response_text = mock_data["response"]
977
+ elif "message" in mock_data and isinstance(mock_data.get("message"), str):
978
+ response_text = mock_data["message"]
979
+ else:
980
+ response_text = ""
938
981
 
939
982
  # Track new messages for this turn
940
983
  new_messages = []
941
984
 
942
985
  # Determine user message
943
- user_message = opts.get("inject")
986
+ user_message = opts.get("message")
944
987
  if self._turn_count == 1 and not user_message and self.initial_message:
945
988
  user_message = self.initial_message
946
989
 
@@ -951,24 +994,37 @@ class DSPyAgentHandle:
951
994
  self._history.add(user_msg)
952
995
 
953
996
  # Add assistant response to new_messages
954
- if "response" in normalized_data:
955
- assistant_msg = {"role": "assistant", "content": normalized_data["response"]}
997
+ if response_text:
998
+ assistant_msg = {"role": "assistant", "content": response_text}
956
999
  new_messages.append(assistant_msg)
957
1000
  self._history.add(assistant_msg)
958
1001
 
959
- # Add message tracking to normalized data
960
- normalized_data["__new_messages__"] = new_messages
961
- normalized_data["__all_messages__"] = self._history.get()
1002
+ prediction_fields: Dict[str, Any] = {}
1003
+
1004
+ tool_calls_list = mock_data.get("tool_calls", [])
1005
+ if tool_calls_list:
1006
+ prediction_fields["tool_calls"] = tool_calls_list
1007
+
1008
+ # If the agent has an explicit output schema, allow structured output via mock `data`.
1009
+ # Otherwise default to plain string output.
1010
+ data = mock_data.get("data")
1011
+ if self.output_schema and isinstance(data, dict) and data:
1012
+ prediction_fields.update(data)
1013
+ else:
1014
+ prediction_fields["response"] = response_text
1015
+
1016
+ # Add message tracking to prediction
1017
+ prediction_fields["__new_messages__"] = new_messages
1018
+ prediction_fields["__all_messages__"] = self._history.get()
962
1019
 
963
1020
  # Create prediction from normalized mock data
964
- result = create_prediction(**normalized_data)
1021
+ result = create_prediction(**prediction_fields)
965
1022
 
966
1023
  # Record all tool calls from the mock
967
1024
  # This allows mocks to trigger Tool.called(...) behavior
968
1025
  # Use getattr since _tool_primitive is set externally by runtime
969
1026
  tool_primitive = getattr(self, "_tool_primitive", None)
970
- if "tool_calls" in normalized_data and tool_primitive:
971
- tool_calls_list = normalized_data.get("tool_calls", [])
1027
+ if tool_calls_list and tool_primitive:
972
1028
  if isinstance(tool_calls_list, list):
973
1029
  for tool_call in tool_calls_list:
974
1030
  if isinstance(tool_call, dict) and "tool" in tool_call:
@@ -978,7 +1034,8 @@ class DSPyAgentHandle:
978
1034
  # For done tool, extract reason for result
979
1035
  if tool_name == "done":
980
1036
  reason = tool_args.get(
981
- "reason", normalized_data.get("response", "Task completed (mocked)")
1037
+ "reason",
1038
+ response_text or "Task completed (mocked)",
982
1039
  )
983
1040
  tool_result = {"status": "completed", "reason": reason, "tool": "done"}
984
1041
  else:
tactus/ide/server.py CHANGED
@@ -434,15 +434,6 @@ def create_app(initial_workspace: Optional[str] = None, frontend_dist_dir: Optio
434
434
  "scenario_count": len(scenarios),
435
435
  }
436
436
 
437
- # Extract stages (flatten if nested)
438
- stages_list = []
439
- if registry.stages:
440
- for stage in registry.stages:
441
- if isinstance(stage, list):
442
- stages_list.extend(stage)
443
- else:
444
- stages_list.append(stage)
445
-
446
437
  # Extract evaluations summary
447
438
  evaluations_data = None
448
439
  if registry.pydantic_evaluations:
@@ -488,7 +479,6 @@ def create_app(initial_workspace: Optional[str] = None, frontend_dist_dir: Optio
488
479
  "toolsets": {name: toolset for name, toolset in registry.toolsets.items()},
489
480
  "tools": sorted(list(all_tools)),
490
481
  "specifications": specifications_data,
491
- "stages": stages_list,
492
482
  "evaluations": evaluations_data,
493
483
  }
494
484