tactus 0.37.0__py3-none-any.whl → 0.38.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. tactus/__init__.py +1 -1
  2. tactus/adapters/channels/base.py +2 -0
  3. tactus/cli/app.py +212 -57
  4. tactus/core/compaction.py +17 -0
  5. tactus/core/context_assembler.py +73 -0
  6. tactus/core/context_models.py +41 -0
  7. tactus/core/dsl_stubs.py +557 -17
  8. tactus/core/exceptions.py +8 -0
  9. tactus/core/execution_context.py +1 -1
  10. tactus/core/mocking.py +12 -0
  11. tactus/core/registry.py +142 -0
  12. tactus/core/retrieval.py +317 -0
  13. tactus/core/retriever_tasks.py +30 -0
  14. tactus/core/runtime.py +388 -74
  15. tactus/dspy/agent.py +143 -82
  16. tactus/dspy/config.py +16 -0
  17. tactus/dspy/module.py +12 -1
  18. tactus/ide/coding_assistant.py +2 -2
  19. tactus/primitives/handles.py +79 -7
  20. tactus/sandbox/config.py +1 -1
  21. tactus/sandbox/container_runner.py +2 -0
  22. tactus/sandbox/entrypoint.py +51 -8
  23. tactus/sandbox/protocol.py +5 -0
  24. tactus/stdlib/README.md +10 -1
  25. tactus/stdlib/biblicus/__init__.py +3 -0
  26. tactus/stdlib/biblicus/text.py +189 -0
  27. tactus/stdlib/tac/biblicus/text.tac +32 -0
  28. tactus/stdlib/tac/tactus/biblicus.spec.tac +179 -0
  29. tactus/stdlib/tac/tactus/corpora/base.tac +42 -0
  30. tactus/stdlib/tac/tactus/corpora/filesystem.tac +5 -0
  31. tactus/stdlib/tac/tactus/retrievers/base.tac +37 -0
  32. tactus/stdlib/tac/tactus/retrievers/embedding_index_file.tac +6 -0
  33. tactus/stdlib/tac/tactus/retrievers/embedding_index_inmemory.tac +6 -0
  34. tactus/stdlib/tac/tactus/retrievers/index.md +137 -0
  35. tactus/stdlib/tac/tactus/retrievers/init.tac +11 -0
  36. tactus/stdlib/tac/tactus/retrievers/sqlite_full_text_search.tac +6 -0
  37. tactus/stdlib/tac/tactus/retrievers/tf_vector.tac +6 -0
  38. tactus/testing/behave_integration.py +2 -0
  39. tactus/testing/context.py +4 -0
  40. tactus/validation/semantic_visitor.py +357 -6
  41. tactus/validation/validator.py +142 -2
  42. {tactus-0.37.0.dist-info → tactus-0.38.0.dist-info}/METADATA +3 -2
  43. {tactus-0.37.0.dist-info → tactus-0.38.0.dist-info}/RECORD +46 -28
  44. {tactus-0.37.0.dist-info → tactus-0.38.0.dist-info}/WHEEL +0 -0
  45. {tactus-0.37.0.dist-info → tactus-0.38.0.dist-info}/entry_points.txt +0 -0
  46. {tactus-0.37.0.dist-info → tactus-0.38.0.dist-info}/licenses/LICENSE +0 -0
tactus/__init__.py CHANGED
@@ -5,7 +5,7 @@ Tactus provides a declarative workflow engine for AI agents with pluggable
5
5
  backends for storage, HITL, and chat recording.
6
6
  """
7
7
 
8
- __version__ = "0.37.0"
8
+ __version__ = "0.38.0"
9
9
 
10
10
  # Core exports
11
11
  from tactus.core.runtime import TactusRuntime
@@ -198,4 +198,6 @@ class InProcessChannel(ABC):
198
198
  """
199
199
  if self._response_queue is None:
200
200
  loop.call_soon_threadsafe(self._ensure_asyncio_primitives)
201
+ loop.call_soon_threadsafe(lambda: self._response_queue.put_nowait(response))
202
+ return
201
203
  loop.call_soon_threadsafe(self._response_queue.put_nowait, response)
tactus/cli/app.py CHANGED
@@ -5,6 +5,9 @@ Main entry point for the Tactus command-line interface.
5
5
  Provides commands for running, validating, and testing workflows.
6
6
  """
7
7
 
8
+ # Future annotations prevent runtime evaluation of PEP 604 unions on Python 3.9.
9
+ from __future__ import annotations
10
+
8
11
  # Disable Pydantic plugins for PyInstaller builds
9
12
  # This prevents logfire (and other plugins) from being loaded via Pydantic's plugin system
10
13
  # which causes errors when trying to inspect source code in frozen apps
@@ -61,6 +64,13 @@ def main_callback(
61
64
 
62
65
  # If no subcommand was invoked and version flag not set, show help
63
66
  if ctx.invoked_subcommand is None:
67
+ if getattr(ctx, "args", None) and ctx.args[0].endswith((".tac", ".lua")):
68
+ workflow_file = Path(ctx.args[0])
69
+ task_name = None
70
+ if len(ctx.args) >= 2 and not ctx.args[1].startswith("-"):
71
+ task_name = ctx.args[1]
72
+ ctx.invoke(run, workflow_file=workflow_file, task=task_name)
73
+ raise typer.Exit()
64
74
  console.print(ctx.get_help())
65
75
  raise typer.Exit()
66
76
 
@@ -179,10 +189,11 @@ def setup_logging(
179
189
  verbose: bool = False,
180
190
  log_level: Optional[str] = None,
181
191
  log_format: str = "rich",
192
+ debug: bool = False,
182
193
  ) -> None:
183
194
  """Setup CLI logging (level + format)."""
184
195
  if log_level is None:
185
- level = logging.DEBUG if verbose else logging.INFO
196
+ level = logging.DEBUG if (verbose or debug) else logging.INFO
186
197
  else:
187
198
  key = str(log_level).strip().lower()
188
199
  if key not in _LOG_LEVELS:
@@ -198,6 +209,10 @@ def setup_logging(
198
209
  f"Invalid --log-format '{log_format}'. Use one of: {', '.join(sorted(_LOG_FORMATS))}"
199
210
  )
200
211
 
212
+ if debug:
213
+ os.environ["TACTUS_TRACE_LLM_MESSAGES"] = "1"
214
+ os.environ["TACTUS_TRACE_CONTEXT"] = "1"
215
+
201
216
  # Default: rich logs (group repeated timestamps).
202
217
  if fmt == "rich":
203
218
  handler: logging.Handler = RichHandler(
@@ -422,15 +437,54 @@ def _check_missing_required_inputs(input_schema: dict, provided_params: dict) ->
422
437
  return missing
423
438
 
424
439
 
440
+ def _print_available_tasks(
441
+ console: Console,
442
+ *,
443
+ workflow_filename: str,
444
+ tasks: list[str],
445
+ show_example_when_empty: bool,
446
+ ) -> None:
447
+ console.print("\n[cyan]Available tasks:[/cyan]")
448
+ for task_name in tasks:
449
+ console.print(f" [bold]{task_name}[/bold]")
450
+
451
+ if tasks:
452
+ example_task = tasks[0]
453
+ elif show_example_when_empty:
454
+ example_task = "run"
455
+ else:
456
+ return
457
+
458
+ console.print(
459
+ "\n[dim]Run a task explicitly, e.g.:[/dim] "
460
+ f"[bold]tactus {workflow_filename} {example_task}[/bold]"
461
+ )
462
+
463
+
464
+ def _print_waiting_for_human(console: Console, *, pending_message_id: str | None) -> None:
465
+ console.print("\n[yellow]Procedure paused - waiting for human response[/yellow]")
466
+ if pending_message_id:
467
+ console.print(f"[dim]Message ID: {pending_message_id}[/dim]")
468
+
469
+ console.print("\n[cyan]The procedure has been paused and is waiting for input.[/cyan]")
470
+ console.print(
471
+ "[cyan]To resume, run the procedure again or provide a response via another channel.[/cyan]\n"
472
+ )
473
+
474
+
425
475
  @app.command()
426
476
  def run(
427
477
  workflow_file: Path = typer.Argument(..., help="Path to workflow file (.tac)"),
478
+ task: Optional[str] = typer.Argument(
479
+ None, help="Optional task name (e.g., run, index, fetch:NOAA)"
480
+ ),
428
481
  storage: str = typer.Option("memory", help="Storage backend: memory, file"),
429
482
  storage_path: Optional[Path] = typer.Option(None, help="Path for file storage"),
430
483
  openai_api_key: Optional[str] = typer.Option(
431
484
  None, envvar="OPENAI_API_KEY", help="OpenAI API key"
432
485
  ),
433
486
  verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable verbose logging"),
487
+ debug: bool = typer.Option(False, "--debug", help="Enable debug logging + context tracing"),
434
488
  log_level: Optional[str] = typer.Option(
435
489
  None, "--log-level", help="Log level: debug, info, warning, error, critical"
436
490
  ),
@@ -499,7 +553,15 @@ def run(
499
553
  # Use real implementation for specific tools while mocking others
500
554
  tactus run workflow.tac --mock-all --real done
501
555
  """
502
- setup_logging(verbose=verbose, log_level=log_level, log_format=log_format)
556
+ setup_logging(verbose=verbose, log_level=log_level, log_format=log_format, debug=debug)
557
+ import warnings
558
+
559
+ warnings.filterwarnings(
560
+ "ignore",
561
+ message="Pydantic serializer warnings:",
562
+ category=UserWarning,
563
+ module="pydantic.main",
564
+ )
503
565
 
504
566
  # Check if file exists
505
567
  if not workflow_file.exists():
@@ -637,6 +699,17 @@ def run(
637
699
  if sandbox_broker_host is not None:
638
700
  sandbox_config_dict["broker_host"] = sandbox_broker_host
639
701
 
702
+ if "dev_mode" not in sandbox_config_dict:
703
+ try:
704
+ import tactus
705
+
706
+ tactus_module_path = Path(tactus.__file__).resolve()
707
+ repo_root = tactus_module_path.parent.parent
708
+ if (repo_root / "tactus").is_dir() and (repo_root / "pyproject.toml").exists():
709
+ sandbox_config_dict["dev_mode"] = True
710
+ except Exception:
711
+ pass
712
+
640
713
  sandbox_config_dict["broker_transport"] = sandbox_broker
641
714
  if (
642
715
  sandbox_network is None
@@ -649,9 +722,12 @@ def run(
649
722
 
650
723
  # Pass logging preferences through to the sandbox container so container stderr matches CLI UX.
651
724
  sandbox_config.env.setdefault(
652
- "TACTUS_LOG_LEVEL", str(log_level or ("debug" if verbose else "info"))
725
+ "TACTUS_LOG_LEVEL", str(log_level or ("debug" if (verbose or debug) else "info"))
653
726
  )
654
727
  sandbox_config.env.setdefault("TACTUS_LOG_FORMAT", str(log_format))
728
+ if debug:
729
+ sandbox_config.env.setdefault("TACTUS_TRACE_LLM_MESSAGES", "1")
730
+ sandbox_config.env.setdefault("TACTUS_TRACE_CONTEXT", "1")
655
731
 
656
732
  # Check Docker availability
657
733
  docker_available, docker_reason = is_docker_available()
@@ -733,14 +809,15 @@ def run(
733
809
  source_file_path=str(workflow_file),
734
810
  )
735
811
 
736
- # Set up mocking based on CLI flags
737
- if mock_all or real_all or mock or real:
738
- from tactus.core.mocking import MockManager
812
+ # Always create a mock manager so Mocks {} blocks can register tool mocks.
813
+ from tactus.core.mocking import MockManager, set_current_mock_manager
739
814
 
740
- # Create and configure mock manager
741
- mock_manager = MockManager()
742
- runtime.mock_manager = mock_manager
815
+ mock_manager = MockManager()
816
+ runtime.mock_manager = mock_manager
817
+ set_current_mock_manager(mock_manager)
743
818
 
819
+ # Set up mocking based on CLI flags
820
+ if mock_all or real_all or mock or real:
744
821
  # Handle global flags
745
822
  if mock_all:
746
823
  mock_manager.enable_mock()
@@ -801,6 +878,7 @@ def run(
801
878
  params=context,
802
879
  source_file_path=str(workflow_file),
803
880
  format=file_format,
881
+ task_name=task,
804
882
  )
805
883
  )
806
884
 
@@ -814,6 +892,25 @@ def run(
814
892
  "tools_used": sandbox_result.metadata.get("tools_used", []),
815
893
  }
816
894
  else:
895
+ # Special-case: sandbox task selection (avoid a scary error dump).
896
+ if sandbox_result.error_type == "TaskSelectionRequired":
897
+ tasks = sandbox_result.metadata.get("tasks", [])
898
+ _print_available_tasks(
899
+ console,
900
+ workflow_filename=workflow_file.name,
901
+ tasks=tasks,
902
+ show_example_when_empty=False,
903
+ )
904
+ return
905
+
906
+ # Special-case: sandbox waiting for human (treat as a pause, not a failure).
907
+ if sandbox_result.metadata.get("waiting_for_human"):
908
+ _print_waiting_for_human(
909
+ console,
910
+ pending_message_id=sandbox_result.metadata.get("pending_message_id"),
911
+ )
912
+ return
913
+
817
914
  result = {
818
915
  "success": False,
819
916
  "error": sandbox_result.error,
@@ -823,42 +920,29 @@ def run(
823
920
  else:
824
921
  # Execute directly (non-sandboxed)
825
922
  try:
826
- result = asyncio.run(runtime.execute(source_content, context, format=file_format))
827
- except Exception as e:
828
- from tactus.core.exceptions import ProcedureWaitingForHuman
829
-
830
- # Check both the exception itself and its __cause__
831
- console.print(f"[dim]DEBUG: Caught exception type: {type(e).__name__}[/dim]")
832
- console.print(
833
- f"[dim]DEBUG: Exception __cause__ type: {type(e.__cause__).__name__ if e.__cause__ else 'None'}[/dim]"
834
- )
835
- console.print(
836
- f"[dim]DEBUG: Is ProcedureWaitingForHuman: {isinstance(e, ProcedureWaitingForHuman)}[/dim]"
837
- )
838
- console.print(
839
- f"[dim]DEBUG: __cause__ is ProcedureWaitingForHuman: {isinstance(e.__cause__, ProcedureWaitingForHuman) if e.__cause__ else False}[/dim]"
923
+ result = asyncio.run(
924
+ runtime.execute(source_content, context, format=file_format, task_name=task)
840
925
  )
841
-
926
+ except Exception as e:
927
+ from tactus.core.exceptions import ProcedureWaitingForHuman, TaskSelectionRequired
928
+
929
+ task_error = e.__cause__ if isinstance(e.__cause__, TaskSelectionRequired) else e
930
+ if isinstance(task_error, TaskSelectionRequired):
931
+ _print_available_tasks(
932
+ console,
933
+ workflow_filename=workflow_file.name,
934
+ tasks=task_error.tasks,
935
+ show_example_when_empty=True,
936
+ )
937
+ return
842
938
  if isinstance(e, ProcedureWaitingForHuman):
843
939
  # Direct exception
844
- console.print(
845
- "\n[yellow]⏸ Procedure paused - waiting for human response[/yellow]"
846
- )
847
- console.print(f"[dim]Message ID: {e.pending_message_id}[/dim]")
848
- console.print("\n[cyan]The procedure has been paused and is waiting for input.")
849
- console.print(
850
- "To resume, run the procedure again or provide a response via another channel.[/cyan]\n"
851
- )
940
+ _print_waiting_for_human(console, pending_message_id=e.pending_message_id)
852
941
  return
853
942
  elif e.__cause__ and isinstance(e.__cause__, ProcedureWaitingForHuman):
854
943
  # Wrapped exception
855
- console.print(
856
- "\n[yellow]⏸ Procedure paused - waiting for human response[/yellow]"
857
- )
858
- console.print(f"[dim]Message ID: {e.__cause__.pending_message_id}[/dim]")
859
- console.print("\n[cyan]The procedure has been paused and is waiting for input.")
860
- console.print(
861
- "To resume, run the procedure again or provide a response via another channel.[/cyan]\n"
944
+ _print_waiting_for_human(
945
+ console, pending_message_id=e.__cause__.pending_message_id
862
946
  )
863
947
  return
864
948
  else:
@@ -910,6 +994,24 @@ def run(
910
994
  if verbose:
911
995
  console.print_exception()
912
996
  raise typer.Exit(1)
997
+ finally:
998
+ try:
999
+ import litellm
1000
+
1001
+ close_clients = getattr(litellm, "close_litellm_async_clients", None)
1002
+ if close_clients:
1003
+ close_result = close_clients()
1004
+ if asyncio.iscoroutine(close_result):
1005
+ asyncio.run(close_result)
1006
+ except Exception:
1007
+ pass
1008
+ try:
1009
+ asyncio.run(control_handler.shutdown_channels())
1010
+ except RuntimeError:
1011
+ # Best-effort cleanup if an event loop is already running.
1012
+ pass
1013
+ except Exception:
1014
+ pass
913
1015
 
914
1016
 
915
1017
  # Sandbox subcommand group
@@ -1264,16 +1366,13 @@ def info(
1264
1366
  # Determine format based on extension
1265
1367
  file_format = "lua" if workflow_file.suffix in [".tac", ".lua"] else "yaml"
1266
1368
 
1267
- # Read workflow file
1268
- source_content = workflow_file.read_text()
1269
-
1270
1369
  console.print(f"[blue]Procedure info:[/blue] [bold]{workflow_file.name}[/bold]\n")
1271
1370
 
1272
1371
  try:
1273
1372
  if file_format == "lua":
1274
1373
  # Use validator to parse procedure
1275
1374
  validator = TactusValidator()
1276
- result = validator.validate(source_content, ValidationMode.FULL)
1375
+ result = validator.validate_file(str(workflow_file), ValidationMode.FULL)
1277
1376
 
1278
1377
  if not result.valid:
1279
1378
  console.print("[red]✗ Invalid procedure - cannot display info[/red]\n")
@@ -1327,6 +1426,48 @@ def info(
1327
1426
  console.print(f" [bold]{name}[/bold]: {type(field_config).__name__}")
1328
1427
  console.print()
1329
1428
 
1429
+ # Show tasks (including nested tasks + implicit retriever tasks)
1430
+ implicit_task_targets: dict[str, list[str]] = {}
1431
+ if registry.retrievers:
1432
+ from tactus.core.retriever_tasks import (
1433
+ resolve_retriever_id,
1434
+ supported_retriever_tasks,
1435
+ )
1436
+
1437
+ for retriever_name, retriever in registry.retrievers.items():
1438
+ config = getattr(retriever, "config", {})
1439
+ retriever_id = resolve_retriever_id(config if isinstance(config, dict) else {})
1440
+ for task_name in sorted(supported_retriever_tasks(retriever_id)):
1441
+ implicit_task_targets.setdefault(task_name, []).append(retriever_name)
1442
+
1443
+ if registry.tasks or implicit_task_targets:
1444
+ console.print("[cyan]Tasks:[/cyan]")
1445
+
1446
+ def _emit_tasks(task_map: dict, prefix: str = "") -> None:
1447
+ for task_name, task in task_map.items():
1448
+ full_name = (
1449
+ f"{prefix}{task_name}" if not prefix else f"{prefix}:{task_name}"
1450
+ )
1451
+ console.print(f" [bold]{full_name}[/bold]")
1452
+ if task.children:
1453
+ _emit_tasks(task.children, full_name)
1454
+
1455
+ if registry.tasks:
1456
+ _emit_tasks(registry.tasks)
1457
+
1458
+ for task_name, retriever_names in implicit_task_targets.items():
1459
+ if task_name in registry.tasks:
1460
+ continue
1461
+ console.print(
1462
+ f" [bold]{task_name}[/bold] [dim](implicit from retrievers)[/dim]"
1463
+ )
1464
+ for retriever_name in retriever_names:
1465
+ console.print(
1466
+ f" [bold]{task_name}:{retriever_name}[/bold] [dim](implicit)[/dim]"
1467
+ )
1468
+
1469
+ console.print()
1470
+
1330
1471
  # Show agents
1331
1472
  if registry.agents:
1332
1473
  console.print("[cyan]Agents:[/cyan]")
@@ -1380,6 +1521,7 @@ def test(
1380
1521
  mock_config: Optional[Path] = typer.Option(None, help="Path to mock config JSON"),
1381
1522
  param: Optional[list[str]] = typer.Option(None, help="Parameters in format key=value"),
1382
1523
  verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable verbose logging"),
1524
+ debug: bool = typer.Option(False, "--debug", help="Enable debug logging + context tracing"),
1383
1525
  ):
1384
1526
  """
1385
1527
  Run BDD specifications for a procedure.
@@ -1400,7 +1542,7 @@ def test(
1400
1542
  # Run specific scenario
1401
1543
  tactus test procedure.tac --scenario "Agent completes research"
1402
1544
  """
1403
- setup_logging(verbose)
1545
+ setup_logging(verbose=verbose, debug=debug)
1404
1546
 
1405
1547
  if not procedure_file.exists():
1406
1548
  console.print(f"[red]Error:[/red] File not found: {procedure_file}")
@@ -1489,7 +1631,10 @@ def test(
1489
1631
  if runs > 1:
1490
1632
  # Run consistency evaluation
1491
1633
  evaluator = TactusEvaluationRunner(
1492
- procedure_file, mock_tools=mock_tools, params=test_params
1634
+ procedure_file,
1635
+ mock_tools=mock_tools,
1636
+ params=test_params,
1637
+ mocked=bool(mock or mock_config),
1493
1638
  )
1494
1639
  evaluator.setup(
1495
1640
  result.registry.gherkin_specifications,
@@ -1506,7 +1651,12 @@ def test(
1506
1651
 
1507
1652
  else:
1508
1653
  # Run standard test
1509
- runner = TactusTestRunner(procedure_file, mock_tools=mock_tools, params=test_params)
1654
+ runner = TactusTestRunner(
1655
+ procedure_file,
1656
+ mock_tools=mock_tools,
1657
+ params=test_params,
1658
+ mocked=bool(mock or mock_config),
1659
+ )
1510
1660
  runner.setup(
1511
1661
  result.registry.gherkin_specifications,
1512
1662
  custom_steps_dict=result.registry.custom_steps,
@@ -1540,13 +1690,13 @@ def _display_test_results(test_result):
1540
1690
  # Include execution metrics in scenario display
1541
1691
  metrics_parts = []
1542
1692
  if scenario.total_cost > 0:
1543
- metrics_parts.append(f"💰 ${scenario.total_cost:.6f}")
1693
+ metrics_parts.append(f"$ {scenario.total_cost:.6f}")
1544
1694
  if scenario.llm_calls > 0:
1545
- metrics_parts.append(f"🤖 {scenario.llm_calls} LLM calls")
1695
+ metrics_parts.append(f"{scenario.llm_calls} LLM calls")
1546
1696
  if scenario.iterations > 0:
1547
- metrics_parts.append(f"🔄 {scenario.iterations} iterations")
1697
+ metrics_parts.append(f"{scenario.iterations} iterations")
1548
1698
  if scenario.tools_used:
1549
- metrics_parts.append(f"🔧 {len(scenario.tools_used)} tools")
1699
+ metrics_parts.append(f"{len(scenario.tools_used)} tools")
1550
1700
 
1551
1701
  metrics_str = f" ({', '.join(metrics_parts)})" if metrics_parts else ""
1552
1702
  console.print(
@@ -1573,14 +1723,14 @@ def _display_test_results(test_result):
1573
1723
  console.print("\n[bold]Execution Metrics:[/bold]")
1574
1724
  if test_result.total_cost > 0:
1575
1725
  console.print(
1576
- f" 💰 Cost: ${test_result.total_cost:.6f} ({test_result.total_tokens:,} tokens)"
1726
+ f" $ Cost: ${test_result.total_cost:.6f} ({test_result.total_tokens:,} tokens)"
1577
1727
  )
1578
1728
  if test_result.total_llm_calls > 0:
1579
- console.print(f" 🤖 LLM Calls: {test_result.total_llm_calls}")
1729
+ console.print(f" LLM Calls: {test_result.total_llm_calls}")
1580
1730
  if test_result.total_iterations > 0:
1581
- console.print(f" 🔄 Iterations: {test_result.total_iterations}")
1731
+ console.print(f" Iterations: {test_result.total_iterations}")
1582
1732
  if test_result.unique_tools_used:
1583
- console.print(f" 🔧 Tools: {', '.join(test_result.unique_tools_used)}")
1733
+ console.print(f" Tools: {', '.join(test_result.unique_tools_used)}")
1584
1734
 
1585
1735
 
1586
1736
  def _display_evaluation_results(eval_results):
@@ -1609,7 +1759,7 @@ def _display_evaluation_results(eval_results):
1609
1759
 
1610
1760
  # Flakiness warning
1611
1761
  if eval_result.is_flaky:
1612
- console.print(" [yellow]⚠️ FLAKY - Inconsistent results detected[/yellow]")
1762
+ console.print(" [yellow]! FLAKY - Inconsistent results detected[/yellow]")
1613
1763
 
1614
1764
 
1615
1765
  def _display_eval_results(report, runs: int, console):
@@ -1722,6 +1872,7 @@ def eval(
1722
1872
  runs: int = typer.Option(1, help="Number of runs per case"),
1723
1873
  parallel: bool = typer.Option(True, help="Run cases in parallel"),
1724
1874
  verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable verbose logging"),
1875
+ debug: bool = typer.Option(False, "--debug", help="Enable debug logging + context tracing"),
1725
1876
  ):
1726
1877
  """
1727
1878
  Run Pydantic Evals evaluation on procedure.
@@ -1741,7 +1892,7 @@ def eval(
1741
1892
  # Run sequentially (for debugging)
1742
1893
  tactus eval procedure.tac --no-parallel
1743
1894
  """
1744
- setup_logging(verbose)
1895
+ setup_logging(verbose=verbose, debug=debug)
1745
1896
  load_tactus_config()
1746
1897
 
1747
1898
  if not procedure_file.exists():
@@ -2309,10 +2460,14 @@ def stdlib_test(
2309
2460
  tactus stdlib test classify # Run only classify tests
2310
2461
  tactus stdlib test extract # Run only extract tests
2311
2462
  """
2463
+ import os
2312
2464
  import tactus
2313
2465
  from tactus.validation import TactusValidator
2314
2466
  from tactus.testing.test_runner import TactusTestRunner
2315
2467
 
2468
+ # Force deterministic mocks for stdlib tests (CI-safe, offline).
2469
+ os.environ["TACTUS_MOCK_MODE"] = "1"
2470
+
2316
2471
  # Find stdlib spec files
2317
2472
  package_root = Path(tactus.__file__).parent
2318
2473
  stdlib_tac_path = package_root / "stdlib" / "tac" / "tactus"
@@ -2362,7 +2517,7 @@ def stdlib_test(
2362
2517
 
2363
2518
  # Run tests
2364
2519
  try:
2365
- runner = TactusTestRunner(spec_file, mock_tools={}, params={})
2520
+ runner = TactusTestRunner(spec_file, mock_tools={}, params={}, mocked=True)
2366
2521
  runner.setup(
2367
2522
  result.registry.gherkin_specifications,
2368
2523
  custom_steps_dict=result.registry.custom_steps,
@@ -0,0 +1,17 @@
1
+ """Re-export Biblicus Context Engine compaction utilities for Tactus."""
2
+
3
+ from biblicus.context_engine import (
4
+ BaseCompactor,
5
+ CompactionRequest,
6
+ SummaryCompactor,
7
+ TruncateCompactor,
8
+ )
9
+ from biblicus.context_engine.compaction import build_compactor
10
+
11
+ __all__ = [
12
+ "BaseCompactor",
13
+ "CompactionRequest",
14
+ "SummaryCompactor",
15
+ "TruncateCompactor",
16
+ "build_compactor",
17
+ ]
@@ -0,0 +1,73 @@
1
+ """Tactus wrapper for the Biblicus Context Engine assembler."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from biblicus.context_engine import ContextAssembler as BiblicusContextAssembler
6
+ from biblicus.context_engine import ContextAssemblyResult
7
+ from typing import Any, Optional
8
+ import logging
9
+ import os
10
+
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ class ContextAssembler(BiblicusContextAssembler):
16
+ """
17
+ Context assembler that defaults to Tactus test retrievers.
18
+
19
+ :param default_retriever: Optional default retriever override.
20
+ :type default_retriever: callable or None
21
+ """
22
+
23
+ def __init__(
24
+ self,
25
+ context_registry,
26
+ retriever_registry: Optional[dict[str, Any]] = None,
27
+ corpus_registry: Optional[dict[str, Any]] = None,
28
+ compactor_registry: Optional[dict[str, Any]] = None,
29
+ default_retriever: Optional[Any] = None,
30
+ ):
31
+ from tactus.core import retrieval as retrieval_module
32
+
33
+ retriever_router = retrieval_module.make_retriever_router(
34
+ corpus_registry, retriever_registry
35
+ )
36
+ super().__init__(
37
+ context_registry,
38
+ retriever_registry=retriever_registry,
39
+ corpus_registry=corpus_registry,
40
+ compactor_registry=compactor_registry,
41
+ default_retriever=default_retriever or retriever_router,
42
+ )
43
+
44
+ def assemble(
45
+ self,
46
+ context_name: str,
47
+ base_system_prompt: str,
48
+ history_messages: list[dict[str, Any]],
49
+ user_message: Optional[str],
50
+ template_context: dict[str, Any],
51
+ retriever_override: Optional[Any] = None,
52
+ ):
53
+ result = super().assemble(
54
+ context_name=context_name,
55
+ base_system_prompt=base_system_prompt,
56
+ history_messages=history_messages,
57
+ user_message=user_message,
58
+ template_context=template_context,
59
+ retriever_override=retriever_override,
60
+ )
61
+ if os.environ.get("TACTUS_TRACE_CONTEXT") == "1":
62
+ logger.debug(
63
+ "[CONTEXT] name=%s system_chars=%s history_items=%s user_chars=%s token_est=%s",
64
+ context_name,
65
+ len(result.system_prompt),
66
+ len(result.history),
67
+ len(result.user_message),
68
+ result.token_count,
69
+ )
70
+ return result
71
+
72
+
73
+ __all__ = ["ContextAssembler", "ContextAssemblyResult"]
@@ -0,0 +1,41 @@
1
+ """Re-export Biblicus Context Engine models for Tactus."""
2
+
3
+ from biblicus.context_engine import (
4
+ AssistantMessageSpec,
5
+ CompactorDeclaration,
6
+ ContextBudgetSpec,
7
+ ContextDeclaration,
8
+ ContextExpansionSpec,
9
+ ContextInsertSpec,
10
+ ContextMessageSpec,
11
+ ContextPackBudgetSpec,
12
+ ContextPackSpec,
13
+ ContextPolicySpec,
14
+ ContextRetrieverRequest,
15
+ ContextTemplateSpec,
16
+ CorpusDeclaration,
17
+ HistoryInsertSpec,
18
+ RetrieverDeclaration,
19
+ SystemMessageSpec,
20
+ UserMessageSpec,
21
+ )
22
+
23
+ __all__ = [
24
+ "AssistantMessageSpec",
25
+ "CompactorDeclaration",
26
+ "ContextBudgetSpec",
27
+ "ContextDeclaration",
28
+ "ContextExpansionSpec",
29
+ "ContextInsertSpec",
30
+ "ContextMessageSpec",
31
+ "ContextPackBudgetSpec",
32
+ "ContextPackSpec",
33
+ "ContextPolicySpec",
34
+ "ContextRetrieverRequest",
35
+ "ContextTemplateSpec",
36
+ "CorpusDeclaration",
37
+ "HistoryInsertSpec",
38
+ "RetrieverDeclaration",
39
+ "SystemMessageSpec",
40
+ "UserMessageSpec",
41
+ ]