PyPI - tactus - Versions diffs - 0.37.0__py3-none-any.whl → 0.38.0__py3-none-any.whl - Mend

tactus 0.37.0py3-none-any.whl → 0.38.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

tactus/__init__.py +1 -1
tactus/adapters/channels/base.py +2 -0
tactus/cli/app.py +212 -57
tactus/core/compaction.py +17 -0
tactus/core/context_assembler.py +73 -0
tactus/core/context_models.py +41 -0
tactus/core/dsl_stubs.py +557 -17
tactus/core/exceptions.py +8 -0
tactus/core/execution_context.py +1 -1
tactus/core/mocking.py +12 -0
tactus/core/registry.py +142 -0
tactus/core/retrieval.py +317 -0
tactus/core/retriever_tasks.py +30 -0
tactus/core/runtime.py +388 -74
tactus/dspy/agent.py +143 -82
tactus/dspy/config.py +16 -0
tactus/dspy/module.py +12 -1
tactus/ide/coding_assistant.py +2 -2
tactus/primitives/handles.py +79 -7
tactus/sandbox/config.py +1 -1
tactus/sandbox/container_runner.py +2 -0
tactus/sandbox/entrypoint.py +51 -8
tactus/sandbox/protocol.py +5 -0
tactus/stdlib/README.md +10 -1
tactus/stdlib/biblicus/__init__.py +3 -0
tactus/stdlib/biblicus/text.py +189 -0
tactus/stdlib/tac/biblicus/text.tac +32 -0
tactus/stdlib/tac/tactus/biblicus.spec.tac +179 -0
tactus/stdlib/tac/tactus/corpora/base.tac +42 -0
tactus/stdlib/tac/tactus/corpora/filesystem.tac +5 -0
tactus/stdlib/tac/tactus/retrievers/base.tac +37 -0
tactus/stdlib/tac/tactus/retrievers/embedding_index_file.tac +6 -0
tactus/stdlib/tac/tactus/retrievers/embedding_index_inmemory.tac +6 -0
tactus/stdlib/tac/tactus/retrievers/index.md +137 -0
tactus/stdlib/tac/tactus/retrievers/init.tac +11 -0
tactus/stdlib/tac/tactus/retrievers/sqlite_full_text_search.tac +6 -0
tactus/stdlib/tac/tactus/retrievers/tf_vector.tac +6 -0
tactus/testing/behave_integration.py +2 -0
tactus/testing/context.py +4 -0
tactus/validation/semantic_visitor.py +357 -6
tactus/validation/validator.py +142 -2
{tactus-0.37.0.dist-info → tactus-0.38.0.dist-info}/METADATA +3 -2
{tactus-0.37.0.dist-info → tactus-0.38.0.dist-info}/RECORD +46 -28
{tactus-0.37.0.dist-info → tactus-0.38.0.dist-info}/WHEEL +0 -0
{tactus-0.37.0.dist-info → tactus-0.38.0.dist-info}/entry_points.txt +0 -0
{tactus-0.37.0.dist-info → tactus-0.38.0.dist-info}/licenses/LICENSE +0 -0

tactus/core/exceptions.py CHANGED Viewed

@@ -11,6 +11,14 @@ class TactusRuntimeError(Exception):
     pass
+class TaskSelectionRequired(TactusRuntimeError):
+    """Raised when multiple tasks are available and no default can be chosen."""
+    def __init__(self, tasks: list[str]):
+        self.tasks = tasks
+        super().__init__("Multiple tasks available; select one explicitly.")
 class ProcedureWaitingForHuman(Exception):
     """
     Raised to exit workflow when waiting for human response.

tactus/core/execution_context.py CHANGED Viewed

@@ -397,7 +397,7 @@ class BaseExecutionContext(ExecutionContext):
             except Exception as exception:
                 logger.warning("Failed to emit checkpoint event: %s", exception)
         else:
-            logger.warning("[CHECKPOINT] No log_handler available to emit checkpoint event")
+            logger.debug("[CHECKPOINT] No log_handler available to emit checkpoint event")
         # Persist metadata
         self.storage.save_procedure_metadata(self.procedure_id, self.metadata)

tactus/core/mocking.py CHANGED Viewed

@@ -13,6 +13,18 @@ import logging
 from typing import Any, Optional, Union
 logger = logging.getLogger(__name__)
+_CURRENT_MOCK_MANAGER: Optional["MockManager"] = None
+def set_current_mock_manager(manager: Optional["MockManager"]) -> None:
+    """Set the globally accessible mock manager for stdlib helpers."""
+    global _CURRENT_MOCK_MANAGER
+    _CURRENT_MOCK_MANAGER = manager
+def get_current_mock_manager() -> Optional["MockManager"]:
+    """Get the globally accessible mock manager for stdlib helpers."""
+    return _CURRENT_MOCK_MANAGER
 @dataclass

tactus/core/registry.py CHANGED Viewed

@@ -10,6 +10,13 @@ from typing import Any, Dict, Optional, Union
 from pydantic import BaseModel, Field, ValidationError, ConfigDict
+from tactus.core.context_models import (
+    CompactorDeclaration,
+    ContextDeclaration,
+    CorpusDeclaration,
+    RetrieverDeclaration,
+)
 logger = logging.getLogger(__name__)
@@ -128,6 +135,18 @@ class AgentMockConfig(BaseModel):
     )
+class TaskDeclaration(BaseModel):
+    """Task declaration from DSL."""
+    name: str
+    children: dict[str, "TaskDeclaration"] = Field(default_factory=dict)
+    model_config = ConfigDict(extra="allow")
+TaskDeclaration.model_rebuild()
 class ProcedureRegistry(BaseModel):
     """Collects all declarations from a .tac file."""
@@ -149,6 +168,12 @@ class ProcedureRegistry(BaseModel):
     dependencies: dict[str, DependencyDeclaration] = Field(default_factory=dict)
     mocks: dict[str, dict[str, Any]] = Field(default_factory=dict)  # Mock configurations
     agent_mocks: dict[str, AgentMockConfig] = Field(default_factory=dict)  # Agent mock configs
+    contexts: dict[str, ContextDeclaration] = Field(default_factory=dict)
+    corpora: dict[str, CorpusDeclaration] = Field(default_factory=dict)
+    retrievers: dict[str, RetrieverDeclaration] = Field(default_factory=dict)
+    compactors: dict[str, CompactorDeclaration] = Field(default_factory=dict)
+    tasks: dict[str, TaskDeclaration] = Field(default_factory=dict)
+    include_tasks: list[dict[str, Any]] = Field(default_factory=list)
     # Message history configuration (aligned with pydantic-ai)
     message_history_config: dict[str, Any] = Field(default_factory=dict)
@@ -334,6 +359,123 @@ class RegistryBuilder:
         except Exception as exception:
             self._add_error(f"Invalid agent mock config for '{agent_name}': {exception}")
+    def register_context(self, name: str, config: dict) -> None:
+        """Register a context declaration."""
+        context_config = dict(config)
+        context_config["name"] = name
+        try:
+            self.registry.contexts[name] = ContextDeclaration(**context_config)
+        except ValidationError as exception:
+            self._add_error(f"Invalid context '{name}': {exception}")
+    def register_corpus(self, name: str, config: dict) -> None:
+        """Register a corpus declaration."""
+        corpus_config = dict(config)
+        if "root" in corpus_config and "corpus_root" not in corpus_config:
+            corpus_config["corpus_root"] = corpus_config.pop("root")
+        try:
+            self.registry.corpora[name] = CorpusDeclaration(name=name, config=corpus_config)
+        except ValidationError as exception:
+            self._add_error(f"Invalid corpus '{name}': {exception}")
+    def register_retriever(self, name: str, config: dict) -> None:
+        """Register a retriever declaration."""
+        retriever_config = dict(config)
+        if "retriever_id" not in retriever_config:
+            candidate = retriever_config.get("retriever_type")
+            if candidate is not None:
+                retriever_config["retriever_id"] = candidate
+        if isinstance(retriever_config.get("configuration"), dict):
+            pipeline = retriever_config["configuration"].get("pipeline", {}) or {}
+            if isinstance(pipeline, dict) and isinstance(pipeline.get("query"), dict):
+                query_config = pipeline.get("query") or {}
+                for key in (
+                    "limit",
+                    "offset",
+                    "maximum_total_characters",
+                    "maximum_items_per_source",
+                    "max_items_per_source",
+                    "include_metadata",
+                    "metadata_fields",
+                    "join_with",
+                ):
+                    if key in query_config and key not in retriever_config:
+                        retriever_config[key] = query_config.get(key)
+        corpus_name = retriever_config.pop("corpus", None)
+        try:
+            self.registry.retrievers[name] = RetrieverDeclaration(
+                name=name,
+                corpus=corpus_name,
+                config=retriever_config,
+            )
+        except ValidationError as exception:
+            self._add_error(f"Invalid retriever '{name}': {exception}")
+    def register_task(
+        self,
+        name: str,
+        task_config: Optional[dict] = None,
+        parent: Optional[str] = None,
+    ) -> None:
+        """Register a task declaration (optionally nested under a parent task)."""
+        if not name:
+            self._add_error("Task name is required.")
+            return
+        if ":" in name:
+            self._add_error(f"Task name '{name}' may not contain ':'")
+            return
+        task_payload = dict(task_config or {})
+        task_payload["name"] = name
+        try:
+            task = TaskDeclaration(**task_payload)
+        except ValidationError as exception:
+            self._add_error(f"Invalid task '{name}': {exception}")
+            return
+        if parent is None:
+            if name in self.registry.tasks:
+                self._add_error(f"Duplicate task '{name}'")
+                return
+            self.registry.tasks[name] = task
+            return
+        parent_task = self._find_task(parent)
+        if parent_task is None:
+            self._add_error(f"Parent task '{parent}' not found for '{name}'")
+            return
+        if name in parent_task.children:
+            self._add_error(f"Duplicate task '{parent}:{name}'")
+            return
+        parent_task.children[name] = task
+    def register_include_tasks(self, path: str, namespace: Optional[str] = None) -> None:
+        """Register an IncludeTasks directive for static task discovery."""
+        payload = {"path": path}
+        if namespace:
+            payload["namespace"] = namespace
+        self.registry.include_tasks.append(payload)
+    def _find_task(self, name: str) -> Optional[TaskDeclaration]:
+        if name in self.registry.tasks:
+            return self.registry.tasks[name]
+        return None
+    def register_compactor(self, name: str, config: dict) -> None:
+        """Register a compactor declaration."""
+        compactor_config = dict(config)
+        try:
+            self.registry.compactors[name] = CompactorDeclaration(
+                name=name,
+                config=compactor_config,
+            )
+        except ValidationError as exception:
+            self._add_error(f"Invalid compactor '{name}': {exception}")
     def register_specification(self, name: str, scenarios: list) -> None:
         """Register a BDD specification."""
         try:

tactus/core/retrieval.py ADDED Viewed

@@ -0,0 +1,317 @@
+"""Deterministic retrieval utilities for Context packs."""
+from __future__ import annotations
+import hashlib
+import os
+import re
+import urllib.request
+from pathlib import Path
+from typing import Iterable, List
+import pyarrow.parquet as pq
+from biblicus.context import ContextPack, ContextPackBlock
+from biblicus.context_engine import ContextRetrieverRequest, retrieve_context_pack
+from biblicus.corpus import Corpus
+_WIKITEXT2_FILES = {
+    "train": {
+        "filename": "train-00000-of-00001.parquet",
+        "sha256": "e83889baabc497075506f91975be5fac0d45c5290b6b20582c8cd1e853d0c9f7",
+    },
+    "validation": {
+        "filename": "validation-00000-of-00001.parquet",
+        "sha256": "204929b7ff9d6184953f867dedb860e40aa69c078fc1e54b3baaa8fb28511c4c",
+    },
+    "test": {
+        "filename": "test-00000-of-00001.parquet",
+        "sha256": "5f1bea067869d04849c0f975a2b29c4ff47d867f484f5010ea5e861eab246d91",
+    },
+}
+def get_wikitext2_cache_dir() -> Path:
+    """Return the cache directory for Wikitext-2 raw parquet files."""
+    env_path = os.environ.get("TACTUS_WIKITEXT2_CACHE_DIR")
+    if env_path:
+        return Path(env_path)
+    return Path(__file__).resolve().parents[2] / "tests" / "fixtures" / "wikitext-2-raw-v1"
+def ensure_wikitext2_raw(cache_dir: Path | None = None) -> Path:
+    """Ensure the Wikitext-2 raw parquet files are present."""
+    cache_dir = cache_dir or get_wikitext2_cache_dir()
+    cache_dir.mkdir(parents=True, exist_ok=True)
+    base_url = (
+        "https://huggingface.co/datasets/Salesforce/wikitext/resolve/main/" "wikitext-2-raw-v1"
+    )
+    for split, meta in _WIKITEXT2_FILES.items():
+        target = cache_dir / meta["filename"]
+        if target.exists() and _sha256_matches(target, meta["sha256"]):
+            continue
+        url = f"{base_url}/{meta['filename']}"
+        _download_file(url, target)
+        if not _sha256_matches(target, meta["sha256"]):
+            raise RuntimeError(f"Checksum mismatch for {split} parquet file")
+    return cache_dir
+def load_wikitext2_texts(split: str, limit: int | None = None) -> List[str]:
+    """Load Wikitext-2 raw texts for the given split."""
+    if split not in _WIKITEXT2_FILES:
+        raise ValueError(f"Unknown Wikitext2 split: {split}")
+    cache_dir = ensure_wikitext2_raw()
+    parquet_path = cache_dir / _WIKITEXT2_FILES[split]["filename"]
+    table = pq.read_table(parquet_path, columns=["text"])
+    texts = [value for value in table.column("text").to_pylist() if value]
+    if limit is not None:
+        return texts[:limit]
+    return texts
+def retrieve_wikitext2(request: ContextRetrieverRequest) -> ContextPack:
+    """
+    Retrieve matching passages from Wikitext-2 raw.
+    :param request: Context retriever request payload.
+    :type request: ContextRetrieverRequest
+    :return: Context pack derived from matching passages.
+    :rtype: ContextPack
+    """
+    split = request.metadata.get("split", "train")
+    maximum_cache_total_items = request.metadata.get("maximum_cache_total_items")
+    maximum_cache_total_characters = request.metadata.get("maximum_cache_total_characters")
+    texts = load_wikitext2_texts(split=split, limit=None)
+    if maximum_cache_total_items is not None:
+        texts = texts[: int(maximum_cache_total_items)]
+    elif maximum_cache_total_characters is not None:
+        selected = []
+        total_chars = 0
+        for text in texts:
+            text_length = len(text)
+            if total_chars + text_length > int(maximum_cache_total_characters):
+                break
+            selected.append(text)
+            total_chars += text_length
+        texts = selected
+    ranked = _rank_texts(request.query, texts)
+    offset = request.offset
+    limit = request.limit
+    blocks: List[ContextPackBlock] = []
+    remaining_chars = request.maximum_total_characters
+    for idx, text in enumerate(ranked[offset : offset + limit], start=1):
+        snippet = text.strip()
+        if remaining_chars is not None and remaining_chars <= 0:
+            break
+        if remaining_chars is not None and len(snippet) > remaining_chars:
+            snippet = snippet[: remaining_chars - 3].rstrip() + "..."
+        if remaining_chars is not None:
+            remaining_chars -= len(snippet)
+        if not snippet:
+            continue
+        blocks.append(
+            ContextPackBlock(
+                evidence_item_id=f"{split}-{offset + idx}",
+                text=snippet,
+                metadata=None,
+            )
+        )
+    text = "\n\n".join(block.text for block in blocks)
+    return ContextPack(text=text, evidence_count=len(blocks), blocks=blocks)
+def get_noaa_afd_cache_dir() -> Path:
+    """Return the cache directory for NOAA AFD text fixtures."""
+    env_path = os.environ.get("TACTUS_NOAA_AFD_DIR")
+    if env_path:
+        return Path(env_path)
+    return Path(__file__).resolve().parents[2] / "tests" / "fixtures" / "noaa_afd"
+def load_noaa_afd_texts(wfo: str, limit: int | None = None) -> List[str]:
+    """Load NOAA AFD text files for the given WFO code."""
+    base_dir = get_noaa_afd_cache_dir() / wfo.upper()
+    if not base_dir.exists():
+        raise FileNotFoundError(f"No NOAA AFD corpus found for WFO '{wfo}' at {base_dir}")
+    files = sorted(path for path in base_dir.glob("*.txt"))
+    texts = [path.read_text(encoding="utf-8", errors="replace") for path in files]
+    if limit is not None:
+        return texts[:limit]
+    return texts
+def retrieve_noaa_afd(request: ContextRetrieverRequest) -> ContextPack:
+    """
+    Retrieve matching passages from NOAA AFD text fixtures.
+    :param request: Context retriever request payload.
+    :type request: ContextRetrieverRequest
+    :return: Context pack derived from matching passages.
+    :rtype: ContextPack
+    """
+    wfo = request.metadata.get("wfo", "MFL")
+    maximum_cache_total_items = request.metadata.get("maximum_cache_total_items")
+    maximum_cache_total_characters = request.metadata.get("maximum_cache_total_characters")
+    texts = load_noaa_afd_texts(wfo=wfo, limit=None)
+    if maximum_cache_total_items is not None:
+        texts = texts[: int(maximum_cache_total_items)]
+    elif maximum_cache_total_characters is not None:
+        selected = []
+        total_chars = 0
+        for text in texts:
+            text_length = len(text)
+            if total_chars + text_length > int(maximum_cache_total_characters):
+                break
+            selected.append(text)
+            total_chars += text_length
+        texts = selected
+    ranked = _rank_texts(request.query, texts)
+    offset = request.offset
+    limit = request.limit
+    blocks: List[ContextPackBlock] = []
+    remaining_chars = request.maximum_total_characters
+    for idx, text in enumerate(ranked[offset : offset + limit], start=1):
+        snippet = text.strip()
+        if remaining_chars is not None and remaining_chars <= 0:
+            break
+        if remaining_chars is not None and len(snippet) > remaining_chars:
+            snippet = snippet[: remaining_chars - 3].rstrip() + "..."
+        if remaining_chars is not None:
+            remaining_chars -= len(snippet)
+        if not snippet:
+            continue
+        blocks.append(
+            ContextPackBlock(
+                evidence_item_id=f"{wfo.lower()}-{offset + idx}",
+                text=snippet,
+                metadata=None,
+            )
+        )
+    text = "\n\n".join(block.text for block in blocks)
+    return ContextPack(text=text, evidence_count=len(blocks), blocks=blocks)
+def retrieve_biblicus_context_pack(request: ContextRetrieverRequest) -> ContextPack:
+    """
+    Retrieve a context pack using Biblicus retrievers.
+    :param request: Context retriever request payload.
+    :type request: ContextRetrieverRequest
+    :return: Context pack derived from Biblicus retrieval.
+    :rtype: ContextPack
+    :raises ValueError: If required metadata is missing.
+    """
+    metadata = request.metadata or {}
+    retriever_id = metadata.get("retriever_id") or metadata.get("retriever_type")
+    corpus_root = metadata.get("corpus_root") or metadata.get("root")
+    if not retriever_id:
+        raise ValueError("Biblicus retrieval requires 'retriever_id' in metadata")
+    if not corpus_root:
+        raise ValueError("Biblicus retrieval requires 'corpus_root' in metadata")
+    snapshot_id = metadata.get("snapshot_id")
+    configuration_name = metadata.get("configuration_name")
+    configuration = metadata.get("configuration") or {}
+    maximum_items_per_source = metadata.get(
+        "maximum_items_per_source",
+        metadata.get("max_items_per_source"),
+    )
+    include_metadata = bool(metadata.get("include_metadata", False))
+    metadata_fields = metadata.get("metadata_fields")
+    corpus = Corpus.open(corpus_root)
+    return retrieve_context_pack(
+        request=request,
+        corpus=corpus,
+        retriever_id=retriever_id,
+        snapshot_id=snapshot_id,
+        configuration_name=configuration_name,
+        configuration=configuration,
+        max_items_per_source=maximum_items_per_source,
+        include_metadata=include_metadata,
+        metadata_fields=metadata_fields,
+    )
+def make_retriever_router(corpus_registry, retriever_registry=None) -> callable:
+    """
+    Build a retriever dispatcher based on corpus and retriever configuration.
+    :param corpus_registry: Corpus registry used to resolve corpus metadata.
+    :type corpus_registry: dict[str, Any] or None
+    :param retriever_registry: Retriever registry used to resolve retrievers.
+    :type retriever_registry: dict[str, Any] or None
+    :return: Retriever callable that dispatches by retriever id.
+    :rtype: callable
+    """
+    def _route(request: ContextRetrieverRequest) -> ContextPack:
+        corpus_name = request.metadata.get("corpus")
+        retriever_name = request.metadata.get("retriever")
+        retriever_id = request.metadata.get("retriever_id") or request.metadata.get(
+            "retriever_type"
+        )
+        if retriever_id is None and retriever_registry and retriever_name in retriever_registry:
+            retriever_spec = retriever_registry[retriever_name]
+            retriever_config = retriever_spec.config if hasattr(retriever_spec, "config") else {}
+            if isinstance(retriever_config, dict):
+                retriever_id = retriever_config.get("retriever_id") or retriever_config.get(
+                    "retriever_type"
+                )
+        if retriever_id == "noaa_afd":
+            return retrieve_noaa_afd(request)
+        if retriever_id == "wikitext2":
+            return retrieve_wikitext2(request)
+        if retriever_id is None:
+            missing_target = retriever_name or corpus_name or "<unknown>"
+            raise ValueError(f"Missing retriever_id for retriever '{missing_target}'")
+        return retrieve_biblicus_context_pack(request)
+    return _route
+def _rank_texts(query: str, texts: Iterable[str]) -> List[str]:
+    """Rank texts by keyword overlap."""
+    query_terms = _tokenize(query)
+    if not query_terms:
+        return list(texts)
+    scored = []
+    for text in texts:
+        text_terms = _tokenize(text)
+        score = sum(text_terms.count(term) for term in query_terms)
+        scored.append((score, text))
+    scored.sort(key=lambda item: item[0], reverse=True)
+    return [text for score, text in scored if score > 0] or list(texts)
+def _tokenize(text: str) -> List[str]:
+    """Tokenize text to lowercase word tokens."""
+    return re.findall(r"[a-zA-Z0-9]+", text.lower())
+def _download_file(url: str, target: Path) -> None:
+    """Download a file to the target path."""
+    with urllib.request.urlopen(url) as response, target.open("wb") as handle:
+        handle.write(response.read())
+def _sha256_matches(path: Path, expected: str) -> bool:
+    """Check SHA256 checksum of a file."""
+    if not path.exists():
+        return False
+    hasher = hashlib.sha256()
+    with path.open("rb") as handle:
+        for chunk in iter(lambda: handle.read(8192), b""):
+            hasher.update(chunk)
+    return hasher.hexdigest() == expected

tactus/core/retriever_tasks.py ADDED Viewed

@@ -0,0 +1,30 @@
+"""Static metadata for retriever-supported tasks."""
+from __future__ import annotations
+from typing import Optional
+RETRIEVER_TASKS: dict[str, set[str]] = {
+    "tf-vector": {"index"},
+    "sqlite-full-text-search": {"index"},
+    "embedding-index-inmemory": {"index"},
+    "embedding-index-file": {"index"},
+}
+def resolve_retriever_id(config: Optional[dict]) -> Optional[str]:
+    """Resolve retriever identifier from a retriever config dict."""
+    if not isinstance(config, dict):
+        return None
+    for key in ("retriever_id", "retriever_type"):
+        value = config.get(key)
+        if isinstance(value, str) and value.strip():
+            return value
+    return None
+def supported_retriever_tasks(retriever_id: Optional[str]) -> set[str]:
+    """Return supported task names for the retriever identifier."""
+    if not retriever_id:
+        return set()
+    return set(RETRIEVER_TASKS.get(retriever_id, set()))

tactus 0.37.0__py3-none-any.whl → 0.38.0__py3-none-any.whl

tactus 0.37.0py3-none-any.whl → 0.38.0py3-none-any.whl