tactus 0.37.0__py3-none-any.whl → 0.38.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. tactus/__init__.py +1 -1
  2. tactus/adapters/channels/base.py +2 -0
  3. tactus/cli/app.py +212 -57
  4. tactus/core/compaction.py +17 -0
  5. tactus/core/context_assembler.py +73 -0
  6. tactus/core/context_models.py +41 -0
  7. tactus/core/dsl_stubs.py +557 -17
  8. tactus/core/exceptions.py +8 -0
  9. tactus/core/execution_context.py +1 -1
  10. tactus/core/mocking.py +12 -0
  11. tactus/core/registry.py +142 -0
  12. tactus/core/retrieval.py +317 -0
  13. tactus/core/retriever_tasks.py +30 -0
  14. tactus/core/runtime.py +388 -74
  15. tactus/dspy/agent.py +143 -82
  16. tactus/dspy/config.py +16 -0
  17. tactus/dspy/module.py +12 -1
  18. tactus/ide/coding_assistant.py +2 -2
  19. tactus/primitives/handles.py +79 -7
  20. tactus/sandbox/config.py +1 -1
  21. tactus/sandbox/container_runner.py +2 -0
  22. tactus/sandbox/entrypoint.py +51 -8
  23. tactus/sandbox/protocol.py +5 -0
  24. tactus/stdlib/README.md +10 -1
  25. tactus/stdlib/biblicus/__init__.py +3 -0
  26. tactus/stdlib/biblicus/text.py +189 -0
  27. tactus/stdlib/tac/biblicus/text.tac +32 -0
  28. tactus/stdlib/tac/tactus/biblicus.spec.tac +179 -0
  29. tactus/stdlib/tac/tactus/corpora/base.tac +42 -0
  30. tactus/stdlib/tac/tactus/corpora/filesystem.tac +5 -0
  31. tactus/stdlib/tac/tactus/retrievers/base.tac +37 -0
  32. tactus/stdlib/tac/tactus/retrievers/embedding_index_file.tac +6 -0
  33. tactus/stdlib/tac/tactus/retrievers/embedding_index_inmemory.tac +6 -0
  34. tactus/stdlib/tac/tactus/retrievers/index.md +137 -0
  35. tactus/stdlib/tac/tactus/retrievers/init.tac +11 -0
  36. tactus/stdlib/tac/tactus/retrievers/sqlite_full_text_search.tac +6 -0
  37. tactus/stdlib/tac/tactus/retrievers/tf_vector.tac +6 -0
  38. tactus/testing/behave_integration.py +2 -0
  39. tactus/testing/context.py +4 -0
  40. tactus/validation/semantic_visitor.py +357 -6
  41. tactus/validation/validator.py +142 -2
  42. {tactus-0.37.0.dist-info → tactus-0.38.0.dist-info}/METADATA +3 -2
  43. {tactus-0.37.0.dist-info → tactus-0.38.0.dist-info}/RECORD +46 -28
  44. {tactus-0.37.0.dist-info → tactus-0.38.0.dist-info}/WHEEL +0 -0
  45. {tactus-0.37.0.dist-info → tactus-0.38.0.dist-info}/entry_points.txt +0 -0
  46. {tactus-0.37.0.dist-info → tactus-0.38.0.dist-info}/licenses/LICENSE +0 -0
tactus/core/exceptions.py CHANGED
@@ -11,6 +11,14 @@ class TactusRuntimeError(Exception):
11
11
  pass
12
12
 
13
13
 
14
+ class TaskSelectionRequired(TactusRuntimeError):
15
+ """Raised when multiple tasks are available and no default can be chosen."""
16
+
17
+ def __init__(self, tasks: list[str]):
18
+ self.tasks = tasks
19
+ super().__init__("Multiple tasks available; select one explicitly.")
20
+
21
+
14
22
  class ProcedureWaitingForHuman(Exception):
15
23
  """
16
24
  Raised to exit workflow when waiting for human response.
@@ -397,7 +397,7 @@ class BaseExecutionContext(ExecutionContext):
397
397
  except Exception as exception:
398
398
  logger.warning("Failed to emit checkpoint event: %s", exception)
399
399
  else:
400
- logger.warning("[CHECKPOINT] No log_handler available to emit checkpoint event")
400
+ logger.debug("[CHECKPOINT] No log_handler available to emit checkpoint event")
401
401
 
402
402
  # Persist metadata
403
403
  self.storage.save_procedure_metadata(self.procedure_id, self.metadata)
tactus/core/mocking.py CHANGED
@@ -13,6 +13,18 @@ import logging
13
13
  from typing import Any, Optional, Union
14
14
 
15
15
  logger = logging.getLogger(__name__)
16
+ _CURRENT_MOCK_MANAGER: Optional["MockManager"] = None
17
+
18
+
19
+ def set_current_mock_manager(manager: Optional["MockManager"]) -> None:
20
+ """Set the globally accessible mock manager for stdlib helpers."""
21
+ global _CURRENT_MOCK_MANAGER
22
+ _CURRENT_MOCK_MANAGER = manager
23
+
24
+
25
+ def get_current_mock_manager() -> Optional["MockManager"]:
26
+ """Get the globally accessible mock manager for stdlib helpers."""
27
+ return _CURRENT_MOCK_MANAGER
16
28
 
17
29
 
18
30
  @dataclass
tactus/core/registry.py CHANGED
@@ -10,6 +10,13 @@ from typing import Any, Dict, Optional, Union
10
10
 
11
11
  from pydantic import BaseModel, Field, ValidationError, ConfigDict
12
12
 
13
+ from tactus.core.context_models import (
14
+ CompactorDeclaration,
15
+ ContextDeclaration,
16
+ CorpusDeclaration,
17
+ RetrieverDeclaration,
18
+ )
19
+
13
20
  logger = logging.getLogger(__name__)
14
21
 
15
22
 
@@ -128,6 +135,18 @@ class AgentMockConfig(BaseModel):
128
135
  )
129
136
 
130
137
 
138
+ class TaskDeclaration(BaseModel):
139
+ """Task declaration from DSL."""
140
+
141
+ name: str
142
+ children: dict[str, "TaskDeclaration"] = Field(default_factory=dict)
143
+
144
+ model_config = ConfigDict(extra="allow")
145
+
146
+
147
+ TaskDeclaration.model_rebuild()
148
+
149
+
131
150
  class ProcedureRegistry(BaseModel):
132
151
  """Collects all declarations from a .tac file."""
133
152
 
@@ -149,6 +168,12 @@ class ProcedureRegistry(BaseModel):
149
168
  dependencies: dict[str, DependencyDeclaration] = Field(default_factory=dict)
150
169
  mocks: dict[str, dict[str, Any]] = Field(default_factory=dict) # Mock configurations
151
170
  agent_mocks: dict[str, AgentMockConfig] = Field(default_factory=dict) # Agent mock configs
171
+ contexts: dict[str, ContextDeclaration] = Field(default_factory=dict)
172
+ corpora: dict[str, CorpusDeclaration] = Field(default_factory=dict)
173
+ retrievers: dict[str, RetrieverDeclaration] = Field(default_factory=dict)
174
+ compactors: dict[str, CompactorDeclaration] = Field(default_factory=dict)
175
+ tasks: dict[str, TaskDeclaration] = Field(default_factory=dict)
176
+ include_tasks: list[dict[str, Any]] = Field(default_factory=list)
152
177
 
153
178
  # Message history configuration (aligned with pydantic-ai)
154
179
  message_history_config: dict[str, Any] = Field(default_factory=dict)
@@ -334,6 +359,123 @@ class RegistryBuilder:
334
359
  except Exception as exception:
335
360
  self._add_error(f"Invalid agent mock config for '{agent_name}': {exception}")
336
361
 
362
+ def register_context(self, name: str, config: dict) -> None:
363
+ """Register a context declaration."""
364
+ context_config = dict(config)
365
+ context_config["name"] = name
366
+ try:
367
+ self.registry.contexts[name] = ContextDeclaration(**context_config)
368
+ except ValidationError as exception:
369
+ self._add_error(f"Invalid context '{name}': {exception}")
370
+
371
+ def register_corpus(self, name: str, config: dict) -> None:
372
+ """Register a corpus declaration."""
373
+ corpus_config = dict(config)
374
+ if "root" in corpus_config and "corpus_root" not in corpus_config:
375
+ corpus_config["corpus_root"] = corpus_config.pop("root")
376
+ try:
377
+ self.registry.corpora[name] = CorpusDeclaration(name=name, config=corpus_config)
378
+ except ValidationError as exception:
379
+ self._add_error(f"Invalid corpus '{name}': {exception}")
380
+
381
+ def register_retriever(self, name: str, config: dict) -> None:
382
+ """Register a retriever declaration."""
383
+ retriever_config = dict(config)
384
+ if "retriever_id" not in retriever_config:
385
+ candidate = retriever_config.get("retriever_type")
386
+ if candidate is not None:
387
+ retriever_config["retriever_id"] = candidate
388
+ if isinstance(retriever_config.get("configuration"), dict):
389
+ pipeline = retriever_config["configuration"].get("pipeline", {}) or {}
390
+ if isinstance(pipeline, dict) and isinstance(pipeline.get("query"), dict):
391
+ query_config = pipeline.get("query") or {}
392
+ for key in (
393
+ "limit",
394
+ "offset",
395
+ "maximum_total_characters",
396
+ "maximum_items_per_source",
397
+ "max_items_per_source",
398
+ "include_metadata",
399
+ "metadata_fields",
400
+ "join_with",
401
+ ):
402
+ if key in query_config and key not in retriever_config:
403
+ retriever_config[key] = query_config.get(key)
404
+ corpus_name = retriever_config.pop("corpus", None)
405
+ try:
406
+ self.registry.retrievers[name] = RetrieverDeclaration(
407
+ name=name,
408
+ corpus=corpus_name,
409
+ config=retriever_config,
410
+ )
411
+ except ValidationError as exception:
412
+ self._add_error(f"Invalid retriever '{name}': {exception}")
413
+
414
+ def register_task(
415
+ self,
416
+ name: str,
417
+ task_config: Optional[dict] = None,
418
+ parent: Optional[str] = None,
419
+ ) -> None:
420
+ """Register a task declaration (optionally nested under a parent task)."""
421
+ if not name:
422
+ self._add_error("Task name is required.")
423
+ return
424
+
425
+ if ":" in name:
426
+ self._add_error(f"Task name '{name}' may not contain ':'")
427
+ return
428
+
429
+ task_payload = dict(task_config or {})
430
+ task_payload["name"] = name
431
+
432
+ try:
433
+ task = TaskDeclaration(**task_payload)
434
+ except ValidationError as exception:
435
+ self._add_error(f"Invalid task '{name}': {exception}")
436
+ return
437
+
438
+ if parent is None:
439
+ if name in self.registry.tasks:
440
+ self._add_error(f"Duplicate task '{name}'")
441
+ return
442
+ self.registry.tasks[name] = task
443
+ return
444
+
445
+ parent_task = self._find_task(parent)
446
+ if parent_task is None:
447
+ self._add_error(f"Parent task '{parent}' not found for '{name}'")
448
+ return
449
+
450
+ if name in parent_task.children:
451
+ self._add_error(f"Duplicate task '{parent}:{name}'")
452
+ return
453
+
454
+ parent_task.children[name] = task
455
+
456
+ def register_include_tasks(self, path: str, namespace: Optional[str] = None) -> None:
457
+ """Register an IncludeTasks directive for static task discovery."""
458
+ payload = {"path": path}
459
+ if namespace:
460
+ payload["namespace"] = namespace
461
+ self.registry.include_tasks.append(payload)
462
+
463
+ def _find_task(self, name: str) -> Optional[TaskDeclaration]:
464
+ if name in self.registry.tasks:
465
+ return self.registry.tasks[name]
466
+ return None
467
+
468
+ def register_compactor(self, name: str, config: dict) -> None:
469
+ """Register a compactor declaration."""
470
+ compactor_config = dict(config)
471
+ try:
472
+ self.registry.compactors[name] = CompactorDeclaration(
473
+ name=name,
474
+ config=compactor_config,
475
+ )
476
+ except ValidationError as exception:
477
+ self._add_error(f"Invalid compactor '{name}': {exception}")
478
+
337
479
  def register_specification(self, name: str, scenarios: list) -> None:
338
480
  """Register a BDD specification."""
339
481
  try:
@@ -0,0 +1,317 @@
1
+ """Deterministic retrieval utilities for Context packs."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import hashlib
6
+ import os
7
+ import re
8
+ import urllib.request
9
+ from pathlib import Path
10
+ from typing import Iterable, List
11
+
12
+ import pyarrow.parquet as pq
13
+
14
+ from biblicus.context import ContextPack, ContextPackBlock
15
+ from biblicus.context_engine import ContextRetrieverRequest, retrieve_context_pack
16
+ from biblicus.corpus import Corpus
17
+
18
+
19
+ _WIKITEXT2_FILES = {
20
+ "train": {
21
+ "filename": "train-00000-of-00001.parquet",
22
+ "sha256": "e83889baabc497075506f91975be5fac0d45c5290b6b20582c8cd1e853d0c9f7",
23
+ },
24
+ "validation": {
25
+ "filename": "validation-00000-of-00001.parquet",
26
+ "sha256": "204929b7ff9d6184953f867dedb860e40aa69c078fc1e54b3baaa8fb28511c4c",
27
+ },
28
+ "test": {
29
+ "filename": "test-00000-of-00001.parquet",
30
+ "sha256": "5f1bea067869d04849c0f975a2b29c4ff47d867f484f5010ea5e861eab246d91",
31
+ },
32
+ }
33
+
34
+
35
+ def get_wikitext2_cache_dir() -> Path:
36
+ """Return the cache directory for Wikitext-2 raw parquet files."""
37
+ env_path = os.environ.get("TACTUS_WIKITEXT2_CACHE_DIR")
38
+ if env_path:
39
+ return Path(env_path)
40
+ return Path(__file__).resolve().parents[2] / "tests" / "fixtures" / "wikitext-2-raw-v1"
41
+
42
+
43
+ def ensure_wikitext2_raw(cache_dir: Path | None = None) -> Path:
44
+ """Ensure the Wikitext-2 raw parquet files are present."""
45
+ cache_dir = cache_dir or get_wikitext2_cache_dir()
46
+ cache_dir.mkdir(parents=True, exist_ok=True)
47
+ base_url = (
48
+ "https://huggingface.co/datasets/Salesforce/wikitext/resolve/main/" "wikitext-2-raw-v1"
49
+ )
50
+
51
+ for split, meta in _WIKITEXT2_FILES.items():
52
+ target = cache_dir / meta["filename"]
53
+ if target.exists() and _sha256_matches(target, meta["sha256"]):
54
+ continue
55
+ url = f"{base_url}/{meta['filename']}"
56
+ _download_file(url, target)
57
+ if not _sha256_matches(target, meta["sha256"]):
58
+ raise RuntimeError(f"Checksum mismatch for {split} parquet file")
59
+ return cache_dir
60
+
61
+
62
+ def load_wikitext2_texts(split: str, limit: int | None = None) -> List[str]:
63
+ """Load Wikitext-2 raw texts for the given split."""
64
+ if split not in _WIKITEXT2_FILES:
65
+ raise ValueError(f"Unknown Wikitext2 split: {split}")
66
+ cache_dir = ensure_wikitext2_raw()
67
+ parquet_path = cache_dir / _WIKITEXT2_FILES[split]["filename"]
68
+ table = pq.read_table(parquet_path, columns=["text"])
69
+ texts = [value for value in table.column("text").to_pylist() if value]
70
+ if limit is not None:
71
+ return texts[:limit]
72
+ return texts
73
+
74
+
75
+ def retrieve_wikitext2(request: ContextRetrieverRequest) -> ContextPack:
76
+ """
77
+ Retrieve matching passages from Wikitext-2 raw.
78
+
79
+ :param request: Context retriever request payload.
80
+ :type request: ContextRetrieverRequest
81
+ :return: Context pack derived from matching passages.
82
+ :rtype: ContextPack
83
+ """
84
+ split = request.metadata.get("split", "train")
85
+ maximum_cache_total_items = request.metadata.get("maximum_cache_total_items")
86
+ maximum_cache_total_characters = request.metadata.get("maximum_cache_total_characters")
87
+ texts = load_wikitext2_texts(split=split, limit=None)
88
+ if maximum_cache_total_items is not None:
89
+ texts = texts[: int(maximum_cache_total_items)]
90
+ elif maximum_cache_total_characters is not None:
91
+ selected = []
92
+ total_chars = 0
93
+ for text in texts:
94
+ text_length = len(text)
95
+ if total_chars + text_length > int(maximum_cache_total_characters):
96
+ break
97
+ selected.append(text)
98
+ total_chars += text_length
99
+ texts = selected
100
+ ranked = _rank_texts(request.query, texts)
101
+ offset = request.offset
102
+ limit = request.limit
103
+
104
+ blocks: List[ContextPackBlock] = []
105
+ remaining_chars = request.maximum_total_characters
106
+ for idx, text in enumerate(ranked[offset : offset + limit], start=1):
107
+ snippet = text.strip()
108
+ if remaining_chars is not None and remaining_chars <= 0:
109
+ break
110
+ if remaining_chars is not None and len(snippet) > remaining_chars:
111
+ snippet = snippet[: remaining_chars - 3].rstrip() + "..."
112
+ if remaining_chars is not None:
113
+ remaining_chars -= len(snippet)
114
+ if not snippet:
115
+ continue
116
+ blocks.append(
117
+ ContextPackBlock(
118
+ evidence_item_id=f"{split}-{offset + idx}",
119
+ text=snippet,
120
+ metadata=None,
121
+ )
122
+ )
123
+
124
+ text = "\n\n".join(block.text for block in blocks)
125
+ return ContextPack(text=text, evidence_count=len(blocks), blocks=blocks)
126
+
127
+
128
+ def get_noaa_afd_cache_dir() -> Path:
129
+ """Return the cache directory for NOAA AFD text fixtures."""
130
+ env_path = os.environ.get("TACTUS_NOAA_AFD_DIR")
131
+ if env_path:
132
+ return Path(env_path)
133
+ return Path(__file__).resolve().parents[2] / "tests" / "fixtures" / "noaa_afd"
134
+
135
+
136
+ def load_noaa_afd_texts(wfo: str, limit: int | None = None) -> List[str]:
137
+ """Load NOAA AFD text files for the given WFO code."""
138
+ base_dir = get_noaa_afd_cache_dir() / wfo.upper()
139
+ if not base_dir.exists():
140
+ raise FileNotFoundError(f"No NOAA AFD corpus found for WFO '{wfo}' at {base_dir}")
141
+ files = sorted(path for path in base_dir.glob("*.txt"))
142
+ texts = [path.read_text(encoding="utf-8", errors="replace") for path in files]
143
+ if limit is not None:
144
+ return texts[:limit]
145
+ return texts
146
+
147
+
148
+ def retrieve_noaa_afd(request: ContextRetrieverRequest) -> ContextPack:
149
+ """
150
+ Retrieve matching passages from NOAA AFD text fixtures.
151
+
152
+ :param request: Context retriever request payload.
153
+ :type request: ContextRetrieverRequest
154
+ :return: Context pack derived from matching passages.
155
+ :rtype: ContextPack
156
+ """
157
+ wfo = request.metadata.get("wfo", "MFL")
158
+ maximum_cache_total_items = request.metadata.get("maximum_cache_total_items")
159
+ maximum_cache_total_characters = request.metadata.get("maximum_cache_total_characters")
160
+ texts = load_noaa_afd_texts(wfo=wfo, limit=None)
161
+ if maximum_cache_total_items is not None:
162
+ texts = texts[: int(maximum_cache_total_items)]
163
+ elif maximum_cache_total_characters is not None:
164
+ selected = []
165
+ total_chars = 0
166
+ for text in texts:
167
+ text_length = len(text)
168
+ if total_chars + text_length > int(maximum_cache_total_characters):
169
+ break
170
+ selected.append(text)
171
+ total_chars += text_length
172
+ texts = selected
173
+
174
+ ranked = _rank_texts(request.query, texts)
175
+ offset = request.offset
176
+ limit = request.limit
177
+
178
+ blocks: List[ContextPackBlock] = []
179
+ remaining_chars = request.maximum_total_characters
180
+ for idx, text in enumerate(ranked[offset : offset + limit], start=1):
181
+ snippet = text.strip()
182
+ if remaining_chars is not None and remaining_chars <= 0:
183
+ break
184
+ if remaining_chars is not None and len(snippet) > remaining_chars:
185
+ snippet = snippet[: remaining_chars - 3].rstrip() + "..."
186
+ if remaining_chars is not None:
187
+ remaining_chars -= len(snippet)
188
+ if not snippet:
189
+ continue
190
+ blocks.append(
191
+ ContextPackBlock(
192
+ evidence_item_id=f"{wfo.lower()}-{offset + idx}",
193
+ text=snippet,
194
+ metadata=None,
195
+ )
196
+ )
197
+
198
+ text = "\n\n".join(block.text for block in blocks)
199
+ return ContextPack(text=text, evidence_count=len(blocks), blocks=blocks)
200
+
201
+
202
+ def retrieve_biblicus_context_pack(request: ContextRetrieverRequest) -> ContextPack:
203
+ """
204
+ Retrieve a context pack using Biblicus retrievers.
205
+
206
+ :param request: Context retriever request payload.
207
+ :type request: ContextRetrieverRequest
208
+ :return: Context pack derived from Biblicus retrieval.
209
+ :rtype: ContextPack
210
+ :raises ValueError: If required metadata is missing.
211
+ """
212
+ metadata = request.metadata or {}
213
+ retriever_id = metadata.get("retriever_id") or metadata.get("retriever_type")
214
+ corpus_root = metadata.get("corpus_root") or metadata.get("root")
215
+ if not retriever_id:
216
+ raise ValueError("Biblicus retrieval requires 'retriever_id' in metadata")
217
+ if not corpus_root:
218
+ raise ValueError("Biblicus retrieval requires 'corpus_root' in metadata")
219
+
220
+ snapshot_id = metadata.get("snapshot_id")
221
+ configuration_name = metadata.get("configuration_name")
222
+ configuration = metadata.get("configuration") or {}
223
+ maximum_items_per_source = metadata.get(
224
+ "maximum_items_per_source",
225
+ metadata.get("max_items_per_source"),
226
+ )
227
+ include_metadata = bool(metadata.get("include_metadata", False))
228
+ metadata_fields = metadata.get("metadata_fields")
229
+
230
+ corpus = Corpus.open(corpus_root)
231
+ return retrieve_context_pack(
232
+ request=request,
233
+ corpus=corpus,
234
+ retriever_id=retriever_id,
235
+ snapshot_id=snapshot_id,
236
+ configuration_name=configuration_name,
237
+ configuration=configuration,
238
+ max_items_per_source=maximum_items_per_source,
239
+ include_metadata=include_metadata,
240
+ metadata_fields=metadata_fields,
241
+ )
242
+
243
+
244
+ def make_retriever_router(corpus_registry, retriever_registry=None) -> callable:
245
+ """
246
+ Build a retriever dispatcher based on corpus and retriever configuration.
247
+
248
+ :param corpus_registry: Corpus registry used to resolve corpus metadata.
249
+ :type corpus_registry: dict[str, Any] or None
250
+ :param retriever_registry: Retriever registry used to resolve retrievers.
251
+ :type retriever_registry: dict[str, Any] or None
252
+ :return: Retriever callable that dispatches by retriever id.
253
+ :rtype: callable
254
+ """
255
+
256
+ def _route(request: ContextRetrieverRequest) -> ContextPack:
257
+ corpus_name = request.metadata.get("corpus")
258
+ retriever_name = request.metadata.get("retriever")
259
+ retriever_id = request.metadata.get("retriever_id") or request.metadata.get(
260
+ "retriever_type"
261
+ )
262
+ if retriever_id is None and retriever_registry and retriever_name in retriever_registry:
263
+ retriever_spec = retriever_registry[retriever_name]
264
+ retriever_config = retriever_spec.config if hasattr(retriever_spec, "config") else {}
265
+ if isinstance(retriever_config, dict):
266
+ retriever_id = retriever_config.get("retriever_id") or retriever_config.get(
267
+ "retriever_type"
268
+ )
269
+
270
+ if retriever_id == "noaa_afd":
271
+ return retrieve_noaa_afd(request)
272
+ if retriever_id == "wikitext2":
273
+ return retrieve_wikitext2(request)
274
+
275
+ if retriever_id is None:
276
+ missing_target = retriever_name or corpus_name or "<unknown>"
277
+ raise ValueError(f"Missing retriever_id for retriever '{missing_target}'")
278
+
279
+ return retrieve_biblicus_context_pack(request)
280
+
281
+ return _route
282
+
283
+
284
+ def _rank_texts(query: str, texts: Iterable[str]) -> List[str]:
285
+ """Rank texts by keyword overlap."""
286
+ query_terms = _tokenize(query)
287
+ if not query_terms:
288
+ return list(texts)
289
+ scored = []
290
+ for text in texts:
291
+ text_terms = _tokenize(text)
292
+ score = sum(text_terms.count(term) for term in query_terms)
293
+ scored.append((score, text))
294
+ scored.sort(key=lambda item: item[0], reverse=True)
295
+ return [text for score, text in scored if score > 0] or list(texts)
296
+
297
+
298
+ def _tokenize(text: str) -> List[str]:
299
+ """Tokenize text to lowercase word tokens."""
300
+ return re.findall(r"[a-zA-Z0-9]+", text.lower())
301
+
302
+
303
+ def _download_file(url: str, target: Path) -> None:
304
+ """Download a file to the target path."""
305
+ with urllib.request.urlopen(url) as response, target.open("wb") as handle:
306
+ handle.write(response.read())
307
+
308
+
309
+ def _sha256_matches(path: Path, expected: str) -> bool:
310
+ """Check SHA256 checksum of a file."""
311
+ if not path.exists():
312
+ return False
313
+ hasher = hashlib.sha256()
314
+ with path.open("rb") as handle:
315
+ for chunk in iter(lambda: handle.read(8192), b""):
316
+ hasher.update(chunk)
317
+ return hasher.hexdigest() == expected
@@ -0,0 +1,30 @@
1
+ """Static metadata for retriever-supported tasks."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Optional
6
+
7
+ RETRIEVER_TASKS: dict[str, set[str]] = {
8
+ "tf-vector": {"index"},
9
+ "sqlite-full-text-search": {"index"},
10
+ "embedding-index-inmemory": {"index"},
11
+ "embedding-index-file": {"index"},
12
+ }
13
+
14
+
15
+ def resolve_retriever_id(config: Optional[dict]) -> Optional[str]:
16
+ """Resolve retriever identifier from a retriever config dict."""
17
+ if not isinstance(config, dict):
18
+ return None
19
+ for key in ("retriever_id", "retriever_type"):
20
+ value = config.get(key)
21
+ if isinstance(value, str) and value.strip():
22
+ return value
23
+ return None
24
+
25
+
26
+ def supported_retriever_tasks(retriever_id: Optional[str]) -> set[str]:
27
+ """Return supported task names for the retriever identifier."""
28
+ if not retriever_id:
29
+ return set()
30
+ return set(RETRIEVER_TASKS.get(retriever_id, set()))