tactus 0.37.0__py3-none-any.whl → 0.39.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tactus/__init__.py +1 -1
- tactus/adapters/channels/base.py +2 -0
- tactus/cli/app.py +212 -57
- tactus/core/compaction.py +17 -0
- tactus/core/context_assembler.py +73 -0
- tactus/core/context_models.py +41 -0
- tactus/core/dsl_stubs.py +568 -17
- tactus/core/exceptions.py +8 -0
- tactus/core/execution_context.py +1 -1
- tactus/core/mocking.py +12 -0
- tactus/core/registry.py +142 -0
- tactus/core/retrieval.py +317 -0
- tactus/core/retriever_tasks.py +30 -0
- tactus/core/runtime.py +441 -75
- tactus/dspy/agent.py +143 -82
- tactus/dspy/config.py +16 -0
- tactus/dspy/module.py +12 -1
- tactus/ide/coding_assistant.py +2 -2
- tactus/plugins/__init__.py +3 -0
- tactus/plugins/noaa.py +76 -0
- tactus/primitives/handles.py +79 -7
- tactus/sandbox/config.py +1 -1
- tactus/sandbox/container_runner.py +2 -0
- tactus/sandbox/entrypoint.py +51 -8
- tactus/sandbox/protocol.py +5 -0
- tactus/stdlib/README.md +10 -1
- tactus/stdlib/biblicus/__init__.py +3 -0
- tactus/stdlib/biblicus/text.py +208 -0
- tactus/stdlib/tac/biblicus/text.tac +32 -0
- tactus/stdlib/tac/tactus/biblicus.spec.tac +179 -0
- tactus/stdlib/tac/tactus/corpora/base.tac +42 -0
- tactus/stdlib/tac/tactus/corpora/filesystem.tac +5 -0
- tactus/stdlib/tac/tactus/retrievers/base.tac +37 -0
- tactus/stdlib/tac/tactus/retrievers/embedding_index_file.tac +6 -0
- tactus/stdlib/tac/tactus/retrievers/embedding_index_inmemory.tac +6 -0
- tactus/stdlib/tac/tactus/retrievers/index.md +137 -0
- tactus/stdlib/tac/tactus/retrievers/init.tac +11 -0
- tactus/stdlib/tac/tactus/retrievers/sqlite_full_text_search.tac +6 -0
- tactus/stdlib/tac/tactus/retrievers/tf_vector.tac +6 -0
- tactus/testing/behave_integration.py +2 -0
- tactus/testing/context.py +4 -0
- tactus/validation/semantic_visitor.py +430 -88
- tactus/validation/validator.py +142 -2
- {tactus-0.37.0.dist-info → tactus-0.39.0.dist-info}/METADATA +3 -2
- {tactus-0.37.0.dist-info → tactus-0.39.0.dist-info}/RECORD +48 -28
- {tactus-0.37.0.dist-info → tactus-0.39.0.dist-info}/WHEEL +0 -0
- {tactus-0.37.0.dist-info → tactus-0.39.0.dist-info}/entry_points.txt +0 -0
- {tactus-0.37.0.dist-info → tactus-0.39.0.dist-info}/licenses/LICENSE +0 -0
tactus/__init__.py
CHANGED
tactus/adapters/channels/base.py
CHANGED
|
@@ -198,4 +198,6 @@ class InProcessChannel(ABC):
|
|
|
198
198
|
"""
|
|
199
199
|
if self._response_queue is None:
|
|
200
200
|
loop.call_soon_threadsafe(self._ensure_asyncio_primitives)
|
|
201
|
+
loop.call_soon_threadsafe(lambda: self._response_queue.put_nowait(response))
|
|
202
|
+
return
|
|
201
203
|
loop.call_soon_threadsafe(self._response_queue.put_nowait, response)
|
tactus/cli/app.py
CHANGED
|
@@ -5,6 +5,9 @@ Main entry point for the Tactus command-line interface.
|
|
|
5
5
|
Provides commands for running, validating, and testing workflows.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
+
# Future annotations prevent runtime evaluation of PEP 604 unions on Python 3.9.
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
8
11
|
# Disable Pydantic plugins for PyInstaller builds
|
|
9
12
|
# This prevents logfire (and other plugins) from being loaded via Pydantic's plugin system
|
|
10
13
|
# which causes errors when trying to inspect source code in frozen apps
|
|
@@ -61,6 +64,13 @@ def main_callback(
|
|
|
61
64
|
|
|
62
65
|
# If no subcommand was invoked and version flag not set, show help
|
|
63
66
|
if ctx.invoked_subcommand is None:
|
|
67
|
+
if getattr(ctx, "args", None) and ctx.args[0].endswith((".tac", ".lua")):
|
|
68
|
+
workflow_file = Path(ctx.args[0])
|
|
69
|
+
task_name = None
|
|
70
|
+
if len(ctx.args) >= 2 and not ctx.args[1].startswith("-"):
|
|
71
|
+
task_name = ctx.args[1]
|
|
72
|
+
ctx.invoke(run, workflow_file=workflow_file, task=task_name)
|
|
73
|
+
raise typer.Exit()
|
|
64
74
|
console.print(ctx.get_help())
|
|
65
75
|
raise typer.Exit()
|
|
66
76
|
|
|
@@ -179,10 +189,11 @@ def setup_logging(
|
|
|
179
189
|
verbose: bool = False,
|
|
180
190
|
log_level: Optional[str] = None,
|
|
181
191
|
log_format: str = "rich",
|
|
192
|
+
debug: bool = False,
|
|
182
193
|
) -> None:
|
|
183
194
|
"""Setup CLI logging (level + format)."""
|
|
184
195
|
if log_level is None:
|
|
185
|
-
level = logging.DEBUG if verbose else logging.INFO
|
|
196
|
+
level = logging.DEBUG if (verbose or debug) else logging.INFO
|
|
186
197
|
else:
|
|
187
198
|
key = str(log_level).strip().lower()
|
|
188
199
|
if key not in _LOG_LEVELS:
|
|
@@ -198,6 +209,10 @@ def setup_logging(
|
|
|
198
209
|
f"Invalid --log-format '{log_format}'. Use one of: {', '.join(sorted(_LOG_FORMATS))}"
|
|
199
210
|
)
|
|
200
211
|
|
|
212
|
+
if debug:
|
|
213
|
+
os.environ["TACTUS_TRACE_LLM_MESSAGES"] = "1"
|
|
214
|
+
os.environ["TACTUS_TRACE_CONTEXT"] = "1"
|
|
215
|
+
|
|
201
216
|
# Default: rich logs (group repeated timestamps).
|
|
202
217
|
if fmt == "rich":
|
|
203
218
|
handler: logging.Handler = RichHandler(
|
|
@@ -422,15 +437,54 @@ def _check_missing_required_inputs(input_schema: dict, provided_params: dict) ->
|
|
|
422
437
|
return missing
|
|
423
438
|
|
|
424
439
|
|
|
440
|
+
def _print_available_tasks(
|
|
441
|
+
console: Console,
|
|
442
|
+
*,
|
|
443
|
+
workflow_filename: str,
|
|
444
|
+
tasks: list[str],
|
|
445
|
+
show_example_when_empty: bool,
|
|
446
|
+
) -> None:
|
|
447
|
+
console.print("\n[cyan]Available tasks:[/cyan]")
|
|
448
|
+
for task_name in tasks:
|
|
449
|
+
console.print(f" [bold]{task_name}[/bold]")
|
|
450
|
+
|
|
451
|
+
if tasks:
|
|
452
|
+
example_task = tasks[0]
|
|
453
|
+
elif show_example_when_empty:
|
|
454
|
+
example_task = "run"
|
|
455
|
+
else:
|
|
456
|
+
return
|
|
457
|
+
|
|
458
|
+
console.print(
|
|
459
|
+
"\n[dim]Run a task explicitly, e.g.:[/dim] "
|
|
460
|
+
f"[bold]tactus {workflow_filename} {example_task}[/bold]"
|
|
461
|
+
)
|
|
462
|
+
|
|
463
|
+
|
|
464
|
+
def _print_waiting_for_human(console: Console, *, pending_message_id: str | None) -> None:
|
|
465
|
+
console.print("\n[yellow]Procedure paused - waiting for human response[/yellow]")
|
|
466
|
+
if pending_message_id:
|
|
467
|
+
console.print(f"[dim]Message ID: {pending_message_id}[/dim]")
|
|
468
|
+
|
|
469
|
+
console.print("\n[cyan]The procedure has been paused and is waiting for input.[/cyan]")
|
|
470
|
+
console.print(
|
|
471
|
+
"[cyan]To resume, run the procedure again or provide a response via another channel.[/cyan]\n"
|
|
472
|
+
)
|
|
473
|
+
|
|
474
|
+
|
|
425
475
|
@app.command()
|
|
426
476
|
def run(
|
|
427
477
|
workflow_file: Path = typer.Argument(..., help="Path to workflow file (.tac)"),
|
|
478
|
+
task: Optional[str] = typer.Argument(
|
|
479
|
+
None, help="Optional task name (e.g., run, index, fetch:NOAA)"
|
|
480
|
+
),
|
|
428
481
|
storage: str = typer.Option("memory", help="Storage backend: memory, file"),
|
|
429
482
|
storage_path: Optional[Path] = typer.Option(None, help="Path for file storage"),
|
|
430
483
|
openai_api_key: Optional[str] = typer.Option(
|
|
431
484
|
None, envvar="OPENAI_API_KEY", help="OpenAI API key"
|
|
432
485
|
),
|
|
433
486
|
verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable verbose logging"),
|
|
487
|
+
debug: bool = typer.Option(False, "--debug", help="Enable debug logging + context tracing"),
|
|
434
488
|
log_level: Optional[str] = typer.Option(
|
|
435
489
|
None, "--log-level", help="Log level: debug, info, warning, error, critical"
|
|
436
490
|
),
|
|
@@ -499,7 +553,15 @@ def run(
|
|
|
499
553
|
# Use real implementation for specific tools while mocking others
|
|
500
554
|
tactus run workflow.tac --mock-all --real done
|
|
501
555
|
"""
|
|
502
|
-
setup_logging(verbose=verbose, log_level=log_level, log_format=log_format)
|
|
556
|
+
setup_logging(verbose=verbose, log_level=log_level, log_format=log_format, debug=debug)
|
|
557
|
+
import warnings
|
|
558
|
+
|
|
559
|
+
warnings.filterwarnings(
|
|
560
|
+
"ignore",
|
|
561
|
+
message="Pydantic serializer warnings:",
|
|
562
|
+
category=UserWarning,
|
|
563
|
+
module="pydantic.main",
|
|
564
|
+
)
|
|
503
565
|
|
|
504
566
|
# Check if file exists
|
|
505
567
|
if not workflow_file.exists():
|
|
@@ -637,6 +699,17 @@ def run(
|
|
|
637
699
|
if sandbox_broker_host is not None:
|
|
638
700
|
sandbox_config_dict["broker_host"] = sandbox_broker_host
|
|
639
701
|
|
|
702
|
+
if "dev_mode" not in sandbox_config_dict:
|
|
703
|
+
try:
|
|
704
|
+
import tactus
|
|
705
|
+
|
|
706
|
+
tactus_module_path = Path(tactus.__file__).resolve()
|
|
707
|
+
repo_root = tactus_module_path.parent.parent
|
|
708
|
+
if (repo_root / "tactus").is_dir() and (repo_root / "pyproject.toml").exists():
|
|
709
|
+
sandbox_config_dict["dev_mode"] = True
|
|
710
|
+
except Exception:
|
|
711
|
+
pass
|
|
712
|
+
|
|
640
713
|
sandbox_config_dict["broker_transport"] = sandbox_broker
|
|
641
714
|
if (
|
|
642
715
|
sandbox_network is None
|
|
@@ -649,9 +722,12 @@ def run(
|
|
|
649
722
|
|
|
650
723
|
# Pass logging preferences through to the sandbox container so container stderr matches CLI UX.
|
|
651
724
|
sandbox_config.env.setdefault(
|
|
652
|
-
"TACTUS_LOG_LEVEL", str(log_level or ("debug" if verbose else "info"))
|
|
725
|
+
"TACTUS_LOG_LEVEL", str(log_level or ("debug" if (verbose or debug) else "info"))
|
|
653
726
|
)
|
|
654
727
|
sandbox_config.env.setdefault("TACTUS_LOG_FORMAT", str(log_format))
|
|
728
|
+
if debug:
|
|
729
|
+
sandbox_config.env.setdefault("TACTUS_TRACE_LLM_MESSAGES", "1")
|
|
730
|
+
sandbox_config.env.setdefault("TACTUS_TRACE_CONTEXT", "1")
|
|
655
731
|
|
|
656
732
|
# Check Docker availability
|
|
657
733
|
docker_available, docker_reason = is_docker_available()
|
|
@@ -733,14 +809,15 @@ def run(
|
|
|
733
809
|
source_file_path=str(workflow_file),
|
|
734
810
|
)
|
|
735
811
|
|
|
736
|
-
#
|
|
737
|
-
|
|
738
|
-
from tactus.core.mocking import MockManager
|
|
812
|
+
# Always create a mock manager so Mocks {} blocks can register tool mocks.
|
|
813
|
+
from tactus.core.mocking import MockManager, set_current_mock_manager
|
|
739
814
|
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
815
|
+
mock_manager = MockManager()
|
|
816
|
+
runtime.mock_manager = mock_manager
|
|
817
|
+
set_current_mock_manager(mock_manager)
|
|
743
818
|
|
|
819
|
+
# Set up mocking based on CLI flags
|
|
820
|
+
if mock_all or real_all or mock or real:
|
|
744
821
|
# Handle global flags
|
|
745
822
|
if mock_all:
|
|
746
823
|
mock_manager.enable_mock()
|
|
@@ -801,6 +878,7 @@ def run(
|
|
|
801
878
|
params=context,
|
|
802
879
|
source_file_path=str(workflow_file),
|
|
803
880
|
format=file_format,
|
|
881
|
+
task_name=task,
|
|
804
882
|
)
|
|
805
883
|
)
|
|
806
884
|
|
|
@@ -814,6 +892,25 @@ def run(
|
|
|
814
892
|
"tools_used": sandbox_result.metadata.get("tools_used", []),
|
|
815
893
|
}
|
|
816
894
|
else:
|
|
895
|
+
# Special-case: sandbox task selection (avoid a scary error dump).
|
|
896
|
+
if sandbox_result.error_type == "TaskSelectionRequired":
|
|
897
|
+
tasks = sandbox_result.metadata.get("tasks", [])
|
|
898
|
+
_print_available_tasks(
|
|
899
|
+
console,
|
|
900
|
+
workflow_filename=workflow_file.name,
|
|
901
|
+
tasks=tasks,
|
|
902
|
+
show_example_when_empty=False,
|
|
903
|
+
)
|
|
904
|
+
return
|
|
905
|
+
|
|
906
|
+
# Special-case: sandbox waiting for human (treat as a pause, not a failure).
|
|
907
|
+
if sandbox_result.metadata.get("waiting_for_human"):
|
|
908
|
+
_print_waiting_for_human(
|
|
909
|
+
console,
|
|
910
|
+
pending_message_id=sandbox_result.metadata.get("pending_message_id"),
|
|
911
|
+
)
|
|
912
|
+
return
|
|
913
|
+
|
|
817
914
|
result = {
|
|
818
915
|
"success": False,
|
|
819
916
|
"error": sandbox_result.error,
|
|
@@ -823,42 +920,29 @@ def run(
|
|
|
823
920
|
else:
|
|
824
921
|
# Execute directly (non-sandboxed)
|
|
825
922
|
try:
|
|
826
|
-
result = asyncio.run(
|
|
827
|
-
|
|
828
|
-
from tactus.core.exceptions import ProcedureWaitingForHuman
|
|
829
|
-
|
|
830
|
-
# Check both the exception itself and its __cause__
|
|
831
|
-
console.print(f"[dim]DEBUG: Caught exception type: {type(e).__name__}[/dim]")
|
|
832
|
-
console.print(
|
|
833
|
-
f"[dim]DEBUG: Exception __cause__ type: {type(e.__cause__).__name__ if e.__cause__ else 'None'}[/dim]"
|
|
834
|
-
)
|
|
835
|
-
console.print(
|
|
836
|
-
f"[dim]DEBUG: Is ProcedureWaitingForHuman: {isinstance(e, ProcedureWaitingForHuman)}[/dim]"
|
|
837
|
-
)
|
|
838
|
-
console.print(
|
|
839
|
-
f"[dim]DEBUG: __cause__ is ProcedureWaitingForHuman: {isinstance(e.__cause__, ProcedureWaitingForHuman) if e.__cause__ else False}[/dim]"
|
|
923
|
+
result = asyncio.run(
|
|
924
|
+
runtime.execute(source_content, context, format=file_format, task_name=task)
|
|
840
925
|
)
|
|
841
|
-
|
|
926
|
+
except Exception as e:
|
|
927
|
+
from tactus.core.exceptions import ProcedureWaitingForHuman, TaskSelectionRequired
|
|
928
|
+
|
|
929
|
+
task_error = e.__cause__ if isinstance(e.__cause__, TaskSelectionRequired) else e
|
|
930
|
+
if isinstance(task_error, TaskSelectionRequired):
|
|
931
|
+
_print_available_tasks(
|
|
932
|
+
console,
|
|
933
|
+
workflow_filename=workflow_file.name,
|
|
934
|
+
tasks=task_error.tasks,
|
|
935
|
+
show_example_when_empty=True,
|
|
936
|
+
)
|
|
937
|
+
return
|
|
842
938
|
if isinstance(e, ProcedureWaitingForHuman):
|
|
843
939
|
# Direct exception
|
|
844
|
-
console.
|
|
845
|
-
"\n[yellow]⏸ Procedure paused - waiting for human response[/yellow]"
|
|
846
|
-
)
|
|
847
|
-
console.print(f"[dim]Message ID: {e.pending_message_id}[/dim]")
|
|
848
|
-
console.print("\n[cyan]The procedure has been paused and is waiting for input.")
|
|
849
|
-
console.print(
|
|
850
|
-
"To resume, run the procedure again or provide a response via another channel.[/cyan]\n"
|
|
851
|
-
)
|
|
940
|
+
_print_waiting_for_human(console, pending_message_id=e.pending_message_id)
|
|
852
941
|
return
|
|
853
942
|
elif e.__cause__ and isinstance(e.__cause__, ProcedureWaitingForHuman):
|
|
854
943
|
# Wrapped exception
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
)
|
|
858
|
-
console.print(f"[dim]Message ID: {e.__cause__.pending_message_id}[/dim]")
|
|
859
|
-
console.print("\n[cyan]The procedure has been paused and is waiting for input.")
|
|
860
|
-
console.print(
|
|
861
|
-
"To resume, run the procedure again or provide a response via another channel.[/cyan]\n"
|
|
944
|
+
_print_waiting_for_human(
|
|
945
|
+
console, pending_message_id=e.__cause__.pending_message_id
|
|
862
946
|
)
|
|
863
947
|
return
|
|
864
948
|
else:
|
|
@@ -910,6 +994,24 @@ def run(
|
|
|
910
994
|
if verbose:
|
|
911
995
|
console.print_exception()
|
|
912
996
|
raise typer.Exit(1)
|
|
997
|
+
finally:
|
|
998
|
+
try:
|
|
999
|
+
import litellm
|
|
1000
|
+
|
|
1001
|
+
close_clients = getattr(litellm, "close_litellm_async_clients", None)
|
|
1002
|
+
if close_clients:
|
|
1003
|
+
close_result = close_clients()
|
|
1004
|
+
if asyncio.iscoroutine(close_result):
|
|
1005
|
+
asyncio.run(close_result)
|
|
1006
|
+
except Exception:
|
|
1007
|
+
pass
|
|
1008
|
+
try:
|
|
1009
|
+
asyncio.run(control_handler.shutdown_channels())
|
|
1010
|
+
except RuntimeError:
|
|
1011
|
+
# Best-effort cleanup if an event loop is already running.
|
|
1012
|
+
pass
|
|
1013
|
+
except Exception:
|
|
1014
|
+
pass
|
|
913
1015
|
|
|
914
1016
|
|
|
915
1017
|
# Sandbox subcommand group
|
|
@@ -1264,16 +1366,13 @@ def info(
|
|
|
1264
1366
|
# Determine format based on extension
|
|
1265
1367
|
file_format = "lua" if workflow_file.suffix in [".tac", ".lua"] else "yaml"
|
|
1266
1368
|
|
|
1267
|
-
# Read workflow file
|
|
1268
|
-
source_content = workflow_file.read_text()
|
|
1269
|
-
|
|
1270
1369
|
console.print(f"[blue]Procedure info:[/blue] [bold]{workflow_file.name}[/bold]\n")
|
|
1271
1370
|
|
|
1272
1371
|
try:
|
|
1273
1372
|
if file_format == "lua":
|
|
1274
1373
|
# Use validator to parse procedure
|
|
1275
1374
|
validator = TactusValidator()
|
|
1276
|
-
result = validator.
|
|
1375
|
+
result = validator.validate_file(str(workflow_file), ValidationMode.FULL)
|
|
1277
1376
|
|
|
1278
1377
|
if not result.valid:
|
|
1279
1378
|
console.print("[red]✗ Invalid procedure - cannot display info[/red]\n")
|
|
@@ -1327,6 +1426,48 @@ def info(
|
|
|
1327
1426
|
console.print(f" [bold]{name}[/bold]: {type(field_config).__name__}")
|
|
1328
1427
|
console.print()
|
|
1329
1428
|
|
|
1429
|
+
# Show tasks (including nested tasks + implicit retriever tasks)
|
|
1430
|
+
implicit_task_targets: dict[str, list[str]] = {}
|
|
1431
|
+
if registry.retrievers:
|
|
1432
|
+
from tactus.core.retriever_tasks import (
|
|
1433
|
+
resolve_retriever_id,
|
|
1434
|
+
supported_retriever_tasks,
|
|
1435
|
+
)
|
|
1436
|
+
|
|
1437
|
+
for retriever_name, retriever in registry.retrievers.items():
|
|
1438
|
+
config = getattr(retriever, "config", {})
|
|
1439
|
+
retriever_id = resolve_retriever_id(config if isinstance(config, dict) else {})
|
|
1440
|
+
for task_name in sorted(supported_retriever_tasks(retriever_id)):
|
|
1441
|
+
implicit_task_targets.setdefault(task_name, []).append(retriever_name)
|
|
1442
|
+
|
|
1443
|
+
if registry.tasks or implicit_task_targets:
|
|
1444
|
+
console.print("[cyan]Tasks:[/cyan]")
|
|
1445
|
+
|
|
1446
|
+
def _emit_tasks(task_map: dict, prefix: str = "") -> None:
|
|
1447
|
+
for task_name, task in task_map.items():
|
|
1448
|
+
full_name = (
|
|
1449
|
+
f"{prefix}{task_name}" if not prefix else f"{prefix}:{task_name}"
|
|
1450
|
+
)
|
|
1451
|
+
console.print(f" [bold]{full_name}[/bold]")
|
|
1452
|
+
if task.children:
|
|
1453
|
+
_emit_tasks(task.children, full_name)
|
|
1454
|
+
|
|
1455
|
+
if registry.tasks:
|
|
1456
|
+
_emit_tasks(registry.tasks)
|
|
1457
|
+
|
|
1458
|
+
for task_name, retriever_names in implicit_task_targets.items():
|
|
1459
|
+
if task_name in registry.tasks:
|
|
1460
|
+
continue
|
|
1461
|
+
console.print(
|
|
1462
|
+
f" [bold]{task_name}[/bold] [dim](implicit from retrievers)[/dim]"
|
|
1463
|
+
)
|
|
1464
|
+
for retriever_name in retriever_names:
|
|
1465
|
+
console.print(
|
|
1466
|
+
f" [bold]{task_name}:{retriever_name}[/bold] [dim](implicit)[/dim]"
|
|
1467
|
+
)
|
|
1468
|
+
|
|
1469
|
+
console.print()
|
|
1470
|
+
|
|
1330
1471
|
# Show agents
|
|
1331
1472
|
if registry.agents:
|
|
1332
1473
|
console.print("[cyan]Agents:[/cyan]")
|
|
@@ -1380,6 +1521,7 @@ def test(
|
|
|
1380
1521
|
mock_config: Optional[Path] = typer.Option(None, help="Path to mock config JSON"),
|
|
1381
1522
|
param: Optional[list[str]] = typer.Option(None, help="Parameters in format key=value"),
|
|
1382
1523
|
verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable verbose logging"),
|
|
1524
|
+
debug: bool = typer.Option(False, "--debug", help="Enable debug logging + context tracing"),
|
|
1383
1525
|
):
|
|
1384
1526
|
"""
|
|
1385
1527
|
Run BDD specifications for a procedure.
|
|
@@ -1400,7 +1542,7 @@ def test(
|
|
|
1400
1542
|
# Run specific scenario
|
|
1401
1543
|
tactus test procedure.tac --scenario "Agent completes research"
|
|
1402
1544
|
"""
|
|
1403
|
-
setup_logging(verbose)
|
|
1545
|
+
setup_logging(verbose=verbose, debug=debug)
|
|
1404
1546
|
|
|
1405
1547
|
if not procedure_file.exists():
|
|
1406
1548
|
console.print(f"[red]Error:[/red] File not found: {procedure_file}")
|
|
@@ -1489,7 +1631,10 @@ def test(
|
|
|
1489
1631
|
if runs > 1:
|
|
1490
1632
|
# Run consistency evaluation
|
|
1491
1633
|
evaluator = TactusEvaluationRunner(
|
|
1492
|
-
procedure_file,
|
|
1634
|
+
procedure_file,
|
|
1635
|
+
mock_tools=mock_tools,
|
|
1636
|
+
params=test_params,
|
|
1637
|
+
mocked=bool(mock or mock_config),
|
|
1493
1638
|
)
|
|
1494
1639
|
evaluator.setup(
|
|
1495
1640
|
result.registry.gherkin_specifications,
|
|
@@ -1506,7 +1651,12 @@ def test(
|
|
|
1506
1651
|
|
|
1507
1652
|
else:
|
|
1508
1653
|
# Run standard test
|
|
1509
|
-
runner = TactusTestRunner(
|
|
1654
|
+
runner = TactusTestRunner(
|
|
1655
|
+
procedure_file,
|
|
1656
|
+
mock_tools=mock_tools,
|
|
1657
|
+
params=test_params,
|
|
1658
|
+
mocked=bool(mock or mock_config),
|
|
1659
|
+
)
|
|
1510
1660
|
runner.setup(
|
|
1511
1661
|
result.registry.gherkin_specifications,
|
|
1512
1662
|
custom_steps_dict=result.registry.custom_steps,
|
|
@@ -1540,13 +1690,13 @@ def _display_test_results(test_result):
|
|
|
1540
1690
|
# Include execution metrics in scenario display
|
|
1541
1691
|
metrics_parts = []
|
|
1542
1692
|
if scenario.total_cost > 0:
|
|
1543
|
-
metrics_parts.append(f"
|
|
1693
|
+
metrics_parts.append(f"$ {scenario.total_cost:.6f}")
|
|
1544
1694
|
if scenario.llm_calls > 0:
|
|
1545
|
-
metrics_parts.append(f"
|
|
1695
|
+
metrics_parts.append(f"{scenario.llm_calls} LLM calls")
|
|
1546
1696
|
if scenario.iterations > 0:
|
|
1547
|
-
metrics_parts.append(f"
|
|
1697
|
+
metrics_parts.append(f"{scenario.iterations} iterations")
|
|
1548
1698
|
if scenario.tools_used:
|
|
1549
|
-
metrics_parts.append(f"
|
|
1699
|
+
metrics_parts.append(f"{len(scenario.tools_used)} tools")
|
|
1550
1700
|
|
|
1551
1701
|
metrics_str = f" ({', '.join(metrics_parts)})" if metrics_parts else ""
|
|
1552
1702
|
console.print(
|
|
@@ -1573,14 +1723,14 @@ def _display_test_results(test_result):
|
|
|
1573
1723
|
console.print("\n[bold]Execution Metrics:[/bold]")
|
|
1574
1724
|
if test_result.total_cost > 0:
|
|
1575
1725
|
console.print(
|
|
1576
|
-
f"
|
|
1726
|
+
f" $ Cost: ${test_result.total_cost:.6f} ({test_result.total_tokens:,} tokens)"
|
|
1577
1727
|
)
|
|
1578
1728
|
if test_result.total_llm_calls > 0:
|
|
1579
|
-
console.print(f"
|
|
1729
|
+
console.print(f" LLM Calls: {test_result.total_llm_calls}")
|
|
1580
1730
|
if test_result.total_iterations > 0:
|
|
1581
|
-
console.print(f"
|
|
1731
|
+
console.print(f" Iterations: {test_result.total_iterations}")
|
|
1582
1732
|
if test_result.unique_tools_used:
|
|
1583
|
-
console.print(f"
|
|
1733
|
+
console.print(f" Tools: {', '.join(test_result.unique_tools_used)}")
|
|
1584
1734
|
|
|
1585
1735
|
|
|
1586
1736
|
def _display_evaluation_results(eval_results):
|
|
@@ -1609,7 +1759,7 @@ def _display_evaluation_results(eval_results):
|
|
|
1609
1759
|
|
|
1610
1760
|
# Flakiness warning
|
|
1611
1761
|
if eval_result.is_flaky:
|
|
1612
|
-
console.print(" [yellow]
|
|
1762
|
+
console.print(" [yellow]! FLAKY - Inconsistent results detected[/yellow]")
|
|
1613
1763
|
|
|
1614
1764
|
|
|
1615
1765
|
def _display_eval_results(report, runs: int, console):
|
|
@@ -1722,6 +1872,7 @@ def eval(
|
|
|
1722
1872
|
runs: int = typer.Option(1, help="Number of runs per case"),
|
|
1723
1873
|
parallel: bool = typer.Option(True, help="Run cases in parallel"),
|
|
1724
1874
|
verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable verbose logging"),
|
|
1875
|
+
debug: bool = typer.Option(False, "--debug", help="Enable debug logging + context tracing"),
|
|
1725
1876
|
):
|
|
1726
1877
|
"""
|
|
1727
1878
|
Run Pydantic Evals evaluation on procedure.
|
|
@@ -1741,7 +1892,7 @@ def eval(
|
|
|
1741
1892
|
# Run sequentially (for debugging)
|
|
1742
1893
|
tactus eval procedure.tac --no-parallel
|
|
1743
1894
|
"""
|
|
1744
|
-
setup_logging(verbose)
|
|
1895
|
+
setup_logging(verbose=verbose, debug=debug)
|
|
1745
1896
|
load_tactus_config()
|
|
1746
1897
|
|
|
1747
1898
|
if not procedure_file.exists():
|
|
@@ -2309,10 +2460,14 @@ def stdlib_test(
|
|
|
2309
2460
|
tactus stdlib test classify # Run only classify tests
|
|
2310
2461
|
tactus stdlib test extract # Run only extract tests
|
|
2311
2462
|
"""
|
|
2463
|
+
import os
|
|
2312
2464
|
import tactus
|
|
2313
2465
|
from tactus.validation import TactusValidator
|
|
2314
2466
|
from tactus.testing.test_runner import TactusTestRunner
|
|
2315
2467
|
|
|
2468
|
+
# Force deterministic mocks for stdlib tests (CI-safe, offline).
|
|
2469
|
+
os.environ["TACTUS_MOCK_MODE"] = "1"
|
|
2470
|
+
|
|
2316
2471
|
# Find stdlib spec files
|
|
2317
2472
|
package_root = Path(tactus.__file__).parent
|
|
2318
2473
|
stdlib_tac_path = package_root / "stdlib" / "tac" / "tactus"
|
|
@@ -2362,7 +2517,7 @@ def stdlib_test(
|
|
|
2362
2517
|
|
|
2363
2518
|
# Run tests
|
|
2364
2519
|
try:
|
|
2365
|
-
runner = TactusTestRunner(spec_file, mock_tools={}, params={})
|
|
2520
|
+
runner = TactusTestRunner(spec_file, mock_tools={}, params={}, mocked=True)
|
|
2366
2521
|
runner.setup(
|
|
2367
2522
|
result.registry.gherkin_specifications,
|
|
2368
2523
|
custom_steps_dict=result.registry.custom_steps,
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""Re-export Biblicus Context Engine compaction utilities for Tactus."""
|
|
2
|
+
|
|
3
|
+
from biblicus.context_engine import (
|
|
4
|
+
BaseCompactor,
|
|
5
|
+
CompactionRequest,
|
|
6
|
+
SummaryCompactor,
|
|
7
|
+
TruncateCompactor,
|
|
8
|
+
)
|
|
9
|
+
from biblicus.context_engine.compaction import build_compactor
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"BaseCompactor",
|
|
13
|
+
"CompactionRequest",
|
|
14
|
+
"SummaryCompactor",
|
|
15
|
+
"TruncateCompactor",
|
|
16
|
+
"build_compactor",
|
|
17
|
+
]
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
"""Tactus wrapper for the Biblicus Context Engine assembler."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from biblicus.context_engine import ContextAssembler as BiblicusContextAssembler
|
|
6
|
+
from biblicus.context_engine import ContextAssemblyResult
|
|
7
|
+
from typing import Any, Optional
|
|
8
|
+
import logging
|
|
9
|
+
import os
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class ContextAssembler(BiblicusContextAssembler):
|
|
16
|
+
"""
|
|
17
|
+
Context assembler that defaults to Tactus test retrievers.
|
|
18
|
+
|
|
19
|
+
:param default_retriever: Optional default retriever override.
|
|
20
|
+
:type default_retriever: callable or None
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
def __init__(
|
|
24
|
+
self,
|
|
25
|
+
context_registry,
|
|
26
|
+
retriever_registry: Optional[dict[str, Any]] = None,
|
|
27
|
+
corpus_registry: Optional[dict[str, Any]] = None,
|
|
28
|
+
compactor_registry: Optional[dict[str, Any]] = None,
|
|
29
|
+
default_retriever: Optional[Any] = None,
|
|
30
|
+
):
|
|
31
|
+
from tactus.core import retrieval as retrieval_module
|
|
32
|
+
|
|
33
|
+
retriever_router = retrieval_module.make_retriever_router(
|
|
34
|
+
corpus_registry, retriever_registry
|
|
35
|
+
)
|
|
36
|
+
super().__init__(
|
|
37
|
+
context_registry,
|
|
38
|
+
retriever_registry=retriever_registry,
|
|
39
|
+
corpus_registry=corpus_registry,
|
|
40
|
+
compactor_registry=compactor_registry,
|
|
41
|
+
default_retriever=default_retriever or retriever_router,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
def assemble(
|
|
45
|
+
self,
|
|
46
|
+
context_name: str,
|
|
47
|
+
base_system_prompt: str,
|
|
48
|
+
history_messages: list[dict[str, Any]],
|
|
49
|
+
user_message: Optional[str],
|
|
50
|
+
template_context: dict[str, Any],
|
|
51
|
+
retriever_override: Optional[Any] = None,
|
|
52
|
+
):
|
|
53
|
+
result = super().assemble(
|
|
54
|
+
context_name=context_name,
|
|
55
|
+
base_system_prompt=base_system_prompt,
|
|
56
|
+
history_messages=history_messages,
|
|
57
|
+
user_message=user_message,
|
|
58
|
+
template_context=template_context,
|
|
59
|
+
retriever_override=retriever_override,
|
|
60
|
+
)
|
|
61
|
+
if os.environ.get("TACTUS_TRACE_CONTEXT") == "1":
|
|
62
|
+
logger.debug(
|
|
63
|
+
"[CONTEXT] name=%s system_chars=%s history_items=%s user_chars=%s token_est=%s",
|
|
64
|
+
context_name,
|
|
65
|
+
len(result.system_prompt),
|
|
66
|
+
len(result.history),
|
|
67
|
+
len(result.user_message),
|
|
68
|
+
result.token_count,
|
|
69
|
+
)
|
|
70
|
+
return result
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
__all__ = ["ContextAssembler", "ContextAssemblyResult"]
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"""Re-export Biblicus Context Engine models for Tactus."""
|
|
2
|
+
|
|
3
|
+
from biblicus.context_engine import (
|
|
4
|
+
AssistantMessageSpec,
|
|
5
|
+
CompactorDeclaration,
|
|
6
|
+
ContextBudgetSpec,
|
|
7
|
+
ContextDeclaration,
|
|
8
|
+
ContextExpansionSpec,
|
|
9
|
+
ContextInsertSpec,
|
|
10
|
+
ContextMessageSpec,
|
|
11
|
+
ContextPackBudgetSpec,
|
|
12
|
+
ContextPackSpec,
|
|
13
|
+
ContextPolicySpec,
|
|
14
|
+
ContextRetrieverRequest,
|
|
15
|
+
ContextTemplateSpec,
|
|
16
|
+
CorpusDeclaration,
|
|
17
|
+
HistoryInsertSpec,
|
|
18
|
+
RetrieverDeclaration,
|
|
19
|
+
SystemMessageSpec,
|
|
20
|
+
UserMessageSpec,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
__all__ = [
|
|
24
|
+
"AssistantMessageSpec",
|
|
25
|
+
"CompactorDeclaration",
|
|
26
|
+
"ContextBudgetSpec",
|
|
27
|
+
"ContextDeclaration",
|
|
28
|
+
"ContextExpansionSpec",
|
|
29
|
+
"ContextInsertSpec",
|
|
30
|
+
"ContextMessageSpec",
|
|
31
|
+
"ContextPackBudgetSpec",
|
|
32
|
+
"ContextPackSpec",
|
|
33
|
+
"ContextPolicySpec",
|
|
34
|
+
"ContextRetrieverRequest",
|
|
35
|
+
"ContextTemplateSpec",
|
|
36
|
+
"CorpusDeclaration",
|
|
37
|
+
"HistoryInsertSpec",
|
|
38
|
+
"RetrieverDeclaration",
|
|
39
|
+
"SystemMessageSpec",
|
|
40
|
+
"UserMessageSpec",
|
|
41
|
+
]
|