tactus 0.36.0__py3-none-any.whl → 0.38.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. tactus/__init__.py +1 -1
  2. tactus/adapters/channels/base.py +22 -2
  3. tactus/adapters/channels/broker.py +1 -0
  4. tactus/adapters/channels/host.py +3 -1
  5. tactus/adapters/channels/ipc.py +18 -3
  6. tactus/adapters/channels/sse.py +2 -0
  7. tactus/adapters/mcp_manager.py +24 -7
  8. tactus/backends/http_backend.py +2 -2
  9. tactus/backends/pytorch_backend.py +2 -2
  10. tactus/broker/client.py +3 -3
  11. tactus/broker/server.py +17 -5
  12. tactus/cli/app.py +212 -57
  13. tactus/core/compaction.py +17 -0
  14. tactus/core/context_assembler.py +73 -0
  15. tactus/core/context_models.py +41 -0
  16. tactus/core/dsl_stubs.py +560 -20
  17. tactus/core/exceptions.py +8 -0
  18. tactus/core/execution_context.py +24 -24
  19. tactus/core/message_history_manager.py +2 -2
  20. tactus/core/mocking.py +12 -0
  21. tactus/core/output_validator.py +6 -6
  22. tactus/core/registry.py +171 -29
  23. tactus/core/retrieval.py +317 -0
  24. tactus/core/retriever_tasks.py +30 -0
  25. tactus/core/runtime.py +431 -117
  26. tactus/dspy/agent.py +143 -82
  27. tactus/dspy/broker_lm.py +13 -7
  28. tactus/dspy/config.py +23 -4
  29. tactus/dspy/module.py +12 -1
  30. tactus/ide/coding_assistant.py +2 -2
  31. tactus/primitives/handles.py +79 -7
  32. tactus/primitives/model.py +1 -1
  33. tactus/primitives/procedure.py +1 -1
  34. tactus/primitives/state.py +2 -2
  35. tactus/sandbox/config.py +1 -1
  36. tactus/sandbox/container_runner.py +13 -6
  37. tactus/sandbox/entrypoint.py +51 -8
  38. tactus/sandbox/protocol.py +5 -0
  39. tactus/stdlib/README.md +10 -1
  40. tactus/stdlib/biblicus/__init__.py +3 -0
  41. tactus/stdlib/biblicus/text.py +189 -0
  42. tactus/stdlib/tac/biblicus/text.tac +32 -0
  43. tactus/stdlib/tac/tactus/biblicus.spec.tac +179 -0
  44. tactus/stdlib/tac/tactus/corpora/base.tac +42 -0
  45. tactus/stdlib/tac/tactus/corpora/filesystem.tac +5 -0
  46. tactus/stdlib/tac/tactus/retrievers/base.tac +37 -0
  47. tactus/stdlib/tac/tactus/retrievers/embedding_index_file.tac +6 -0
  48. tactus/stdlib/tac/tactus/retrievers/embedding_index_inmemory.tac +6 -0
  49. tactus/stdlib/tac/tactus/retrievers/index.md +137 -0
  50. tactus/stdlib/tac/tactus/retrievers/init.tac +11 -0
  51. tactus/stdlib/tac/tactus/retrievers/sqlite_full_text_search.tac +6 -0
  52. tactus/stdlib/tac/tactus/retrievers/tf_vector.tac +6 -0
  53. tactus/testing/behave_integration.py +2 -0
  54. tactus/testing/context.py +10 -6
  55. tactus/testing/evaluation_runner.py +5 -5
  56. tactus/testing/steps/builtin.py +2 -2
  57. tactus/testing/test_runner.py +6 -4
  58. tactus/utils/asyncio_helpers.py +2 -1
  59. tactus/validation/semantic_visitor.py +357 -6
  60. tactus/validation/validator.py +142 -2
  61. {tactus-0.36.0.dist-info → tactus-0.38.0.dist-info}/METADATA +9 -6
  62. {tactus-0.36.0.dist-info → tactus-0.38.0.dist-info}/RECORD +65 -47
  63. {tactus-0.36.0.dist-info → tactus-0.38.0.dist-info}/WHEEL +0 -0
  64. {tactus-0.36.0.dist-info → tactus-0.38.0.dist-info}/entry_points.txt +0 -0
  65. {tactus-0.36.0.dist-info → tactus-0.38.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,317 @@
1
+ """Deterministic retrieval utilities for Context packs."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import hashlib
6
+ import os
7
+ import re
8
+ import urllib.request
9
+ from pathlib import Path
10
+ from typing import Iterable, List
11
+
12
+ import pyarrow.parquet as pq
13
+
14
+ from biblicus.context import ContextPack, ContextPackBlock
15
+ from biblicus.context_engine import ContextRetrieverRequest, retrieve_context_pack
16
+ from biblicus.corpus import Corpus
17
+
18
+
19
+ _WIKITEXT2_FILES = {
20
+ "train": {
21
+ "filename": "train-00000-of-00001.parquet",
22
+ "sha256": "e83889baabc497075506f91975be5fac0d45c5290b6b20582c8cd1e853d0c9f7",
23
+ },
24
+ "validation": {
25
+ "filename": "validation-00000-of-00001.parquet",
26
+ "sha256": "204929b7ff9d6184953f867dedb860e40aa69c078fc1e54b3baaa8fb28511c4c",
27
+ },
28
+ "test": {
29
+ "filename": "test-00000-of-00001.parquet",
30
+ "sha256": "5f1bea067869d04849c0f975a2b29c4ff47d867f484f5010ea5e861eab246d91",
31
+ },
32
+ }
33
+
34
+
35
+ def get_wikitext2_cache_dir() -> Path:
36
+ """Return the cache directory for Wikitext-2 raw parquet files."""
37
+ env_path = os.environ.get("TACTUS_WIKITEXT2_CACHE_DIR")
38
+ if env_path:
39
+ return Path(env_path)
40
+ return Path(__file__).resolve().parents[2] / "tests" / "fixtures" / "wikitext-2-raw-v1"
41
+
42
+
43
+ def ensure_wikitext2_raw(cache_dir: Path | None = None) -> Path:
44
+ """Ensure the Wikitext-2 raw parquet files are present."""
45
+ cache_dir = cache_dir or get_wikitext2_cache_dir()
46
+ cache_dir.mkdir(parents=True, exist_ok=True)
47
+ base_url = (
48
+ "https://huggingface.co/datasets/Salesforce/wikitext/resolve/main/" "wikitext-2-raw-v1"
49
+ )
50
+
51
+ for split, meta in _WIKITEXT2_FILES.items():
52
+ target = cache_dir / meta["filename"]
53
+ if target.exists() and _sha256_matches(target, meta["sha256"]):
54
+ continue
55
+ url = f"{base_url}/{meta['filename']}"
56
+ _download_file(url, target)
57
+ if not _sha256_matches(target, meta["sha256"]):
58
+ raise RuntimeError(f"Checksum mismatch for {split} parquet file")
59
+ return cache_dir
60
+
61
+
62
+ def load_wikitext2_texts(split: str, limit: int | None = None) -> List[str]:
63
+ """Load Wikitext-2 raw texts for the given split."""
64
+ if split not in _WIKITEXT2_FILES:
65
+ raise ValueError(f"Unknown Wikitext2 split: {split}")
66
+ cache_dir = ensure_wikitext2_raw()
67
+ parquet_path = cache_dir / _WIKITEXT2_FILES[split]["filename"]
68
+ table = pq.read_table(parquet_path, columns=["text"])
69
+ texts = [value for value in table.column("text").to_pylist() if value]
70
+ if limit is not None:
71
+ return texts[:limit]
72
+ return texts
73
+
74
+
75
+ def retrieve_wikitext2(request: ContextRetrieverRequest) -> ContextPack:
76
+ """
77
+ Retrieve matching passages from Wikitext-2 raw.
78
+
79
+ :param request: Context retriever request payload.
80
+ :type request: ContextRetrieverRequest
81
+ :return: Context pack derived from matching passages.
82
+ :rtype: ContextPack
83
+ """
84
+ split = request.metadata.get("split", "train")
85
+ maximum_cache_total_items = request.metadata.get("maximum_cache_total_items")
86
+ maximum_cache_total_characters = request.metadata.get("maximum_cache_total_characters")
87
+ texts = load_wikitext2_texts(split=split, limit=None)
88
+ if maximum_cache_total_items is not None:
89
+ texts = texts[: int(maximum_cache_total_items)]
90
+ elif maximum_cache_total_characters is not None:
91
+ selected = []
92
+ total_chars = 0
93
+ for text in texts:
94
+ text_length = len(text)
95
+ if total_chars + text_length > int(maximum_cache_total_characters):
96
+ break
97
+ selected.append(text)
98
+ total_chars += text_length
99
+ texts = selected
100
+ ranked = _rank_texts(request.query, texts)
101
+ offset = request.offset
102
+ limit = request.limit
103
+
104
+ blocks: List[ContextPackBlock] = []
105
+ remaining_chars = request.maximum_total_characters
106
+ for idx, text in enumerate(ranked[offset : offset + limit], start=1):
107
+ snippet = text.strip()
108
+ if remaining_chars is not None and remaining_chars <= 0:
109
+ break
110
+ if remaining_chars is not None and len(snippet) > remaining_chars:
111
+ snippet = snippet[: remaining_chars - 3].rstrip() + "..."
112
+ if remaining_chars is not None:
113
+ remaining_chars -= len(snippet)
114
+ if not snippet:
115
+ continue
116
+ blocks.append(
117
+ ContextPackBlock(
118
+ evidence_item_id=f"{split}-{offset + idx}",
119
+ text=snippet,
120
+ metadata=None,
121
+ )
122
+ )
123
+
124
+ text = "\n\n".join(block.text for block in blocks)
125
+ return ContextPack(text=text, evidence_count=len(blocks), blocks=blocks)
126
+
127
+
128
+ def get_noaa_afd_cache_dir() -> Path:
129
+ """Return the cache directory for NOAA AFD text fixtures."""
130
+ env_path = os.environ.get("TACTUS_NOAA_AFD_DIR")
131
+ if env_path:
132
+ return Path(env_path)
133
+ return Path(__file__).resolve().parents[2] / "tests" / "fixtures" / "noaa_afd"
134
+
135
+
136
+ def load_noaa_afd_texts(wfo: str, limit: int | None = None) -> List[str]:
137
+ """Load NOAA AFD text files for the given WFO code."""
138
+ base_dir = get_noaa_afd_cache_dir() / wfo.upper()
139
+ if not base_dir.exists():
140
+ raise FileNotFoundError(f"No NOAA AFD corpus found for WFO '{wfo}' at {base_dir}")
141
+ files = sorted(path for path in base_dir.glob("*.txt"))
142
+ texts = [path.read_text(encoding="utf-8", errors="replace") for path in files]
143
+ if limit is not None:
144
+ return texts[:limit]
145
+ return texts
146
+
147
+
148
+ def retrieve_noaa_afd(request: ContextRetrieverRequest) -> ContextPack:
149
+ """
150
+ Retrieve matching passages from NOAA AFD text fixtures.
151
+
152
+ :param request: Context retriever request payload.
153
+ :type request: ContextRetrieverRequest
154
+ :return: Context pack derived from matching passages.
155
+ :rtype: ContextPack
156
+ """
157
+ wfo = request.metadata.get("wfo", "MFL")
158
+ maximum_cache_total_items = request.metadata.get("maximum_cache_total_items")
159
+ maximum_cache_total_characters = request.metadata.get("maximum_cache_total_characters")
160
+ texts = load_noaa_afd_texts(wfo=wfo, limit=None)
161
+ if maximum_cache_total_items is not None:
162
+ texts = texts[: int(maximum_cache_total_items)]
163
+ elif maximum_cache_total_characters is not None:
164
+ selected = []
165
+ total_chars = 0
166
+ for text in texts:
167
+ text_length = len(text)
168
+ if total_chars + text_length > int(maximum_cache_total_characters):
169
+ break
170
+ selected.append(text)
171
+ total_chars += text_length
172
+ texts = selected
173
+
174
+ ranked = _rank_texts(request.query, texts)
175
+ offset = request.offset
176
+ limit = request.limit
177
+
178
+ blocks: List[ContextPackBlock] = []
179
+ remaining_chars = request.maximum_total_characters
180
+ for idx, text in enumerate(ranked[offset : offset + limit], start=1):
181
+ snippet = text.strip()
182
+ if remaining_chars is not None and remaining_chars <= 0:
183
+ break
184
+ if remaining_chars is not None and len(snippet) > remaining_chars:
185
+ snippet = snippet[: remaining_chars - 3].rstrip() + "..."
186
+ if remaining_chars is not None:
187
+ remaining_chars -= len(snippet)
188
+ if not snippet:
189
+ continue
190
+ blocks.append(
191
+ ContextPackBlock(
192
+ evidence_item_id=f"{wfo.lower()}-{offset + idx}",
193
+ text=snippet,
194
+ metadata=None,
195
+ )
196
+ )
197
+
198
+ text = "\n\n".join(block.text for block in blocks)
199
+ return ContextPack(text=text, evidence_count=len(blocks), blocks=blocks)
200
+
201
+
202
+ def retrieve_biblicus_context_pack(request: ContextRetrieverRequest) -> ContextPack:
203
+ """
204
+ Retrieve a context pack using Biblicus retrievers.
205
+
206
+ :param request: Context retriever request payload.
207
+ :type request: ContextRetrieverRequest
208
+ :return: Context pack derived from Biblicus retrieval.
209
+ :rtype: ContextPack
210
+ :raises ValueError: If required metadata is missing.
211
+ """
212
+ metadata = request.metadata or {}
213
+ retriever_id = metadata.get("retriever_id") or metadata.get("retriever_type")
214
+ corpus_root = metadata.get("corpus_root") or metadata.get("root")
215
+ if not retriever_id:
216
+ raise ValueError("Biblicus retrieval requires 'retriever_id' in metadata")
217
+ if not corpus_root:
218
+ raise ValueError("Biblicus retrieval requires 'corpus_root' in metadata")
219
+
220
+ snapshot_id = metadata.get("snapshot_id")
221
+ configuration_name = metadata.get("configuration_name")
222
+ configuration = metadata.get("configuration") or {}
223
+ maximum_items_per_source = metadata.get(
224
+ "maximum_items_per_source",
225
+ metadata.get("max_items_per_source"),
226
+ )
227
+ include_metadata = bool(metadata.get("include_metadata", False))
228
+ metadata_fields = metadata.get("metadata_fields")
229
+
230
+ corpus = Corpus.open(corpus_root)
231
+ return retrieve_context_pack(
232
+ request=request,
233
+ corpus=corpus,
234
+ retriever_id=retriever_id,
235
+ snapshot_id=snapshot_id,
236
+ configuration_name=configuration_name,
237
+ configuration=configuration,
238
+ max_items_per_source=maximum_items_per_source,
239
+ include_metadata=include_metadata,
240
+ metadata_fields=metadata_fields,
241
+ )
242
+
243
+
244
+ def make_retriever_router(corpus_registry, retriever_registry=None) -> callable:
245
+ """
246
+ Build a retriever dispatcher based on corpus and retriever configuration.
247
+
248
+ :param corpus_registry: Corpus registry used to resolve corpus metadata.
249
+ :type corpus_registry: dict[str, Any] or None
250
+ :param retriever_registry: Retriever registry used to resolve retrievers.
251
+ :type retriever_registry: dict[str, Any] or None
252
+ :return: Retriever callable that dispatches by retriever id.
253
+ :rtype: callable
254
+ """
255
+
256
+ def _route(request: ContextRetrieverRequest) -> ContextPack:
257
+ corpus_name = request.metadata.get("corpus")
258
+ retriever_name = request.metadata.get("retriever")
259
+ retriever_id = request.metadata.get("retriever_id") or request.metadata.get(
260
+ "retriever_type"
261
+ )
262
+ if retriever_id is None and retriever_registry and retriever_name in retriever_registry:
263
+ retriever_spec = retriever_registry[retriever_name]
264
+ retriever_config = retriever_spec.config if hasattr(retriever_spec, "config") else {}
265
+ if isinstance(retriever_config, dict):
266
+ retriever_id = retriever_config.get("retriever_id") or retriever_config.get(
267
+ "retriever_type"
268
+ )
269
+
270
+ if retriever_id == "noaa_afd":
271
+ return retrieve_noaa_afd(request)
272
+ if retriever_id == "wikitext2":
273
+ return retrieve_wikitext2(request)
274
+
275
+ if retriever_id is None:
276
+ missing_target = retriever_name or corpus_name or "<unknown>"
277
+ raise ValueError(f"Missing retriever_id for retriever '{missing_target}'")
278
+
279
+ return retrieve_biblicus_context_pack(request)
280
+
281
+ return _route
282
+
283
+
284
+ def _rank_texts(query: str, texts: Iterable[str]) -> List[str]:
285
+ """Rank texts by keyword overlap."""
286
+ query_terms = _tokenize(query)
287
+ if not query_terms:
288
+ return list(texts)
289
+ scored = []
290
+ for text in texts:
291
+ text_terms = _tokenize(text)
292
+ score = sum(text_terms.count(term) for term in query_terms)
293
+ scored.append((score, text))
294
+ scored.sort(key=lambda item: item[0], reverse=True)
295
+ return [text for score, text in scored if score > 0] or list(texts)
296
+
297
+
298
+ def _tokenize(text: str) -> List[str]:
299
+ """Tokenize text to lowercase word tokens."""
300
+ return re.findall(r"[a-zA-Z0-9]+", text.lower())
301
+
302
+
303
+ def _download_file(url: str, target: Path) -> None:
304
+ """Download a file to the target path."""
305
+ with urllib.request.urlopen(url) as response, target.open("wb") as handle:
306
+ handle.write(response.read())
307
+
308
+
309
+ def _sha256_matches(path: Path, expected: str) -> bool:
310
+ """Check SHA256 checksum of a file."""
311
+ if not path.exists():
312
+ return False
313
+ hasher = hashlib.sha256()
314
+ with path.open("rb") as handle:
315
+ for chunk in iter(lambda: handle.read(8192), b""):
316
+ hasher.update(chunk)
317
+ return hasher.hexdigest() == expected
@@ -0,0 +1,30 @@
1
+ """Static metadata for retriever-supported tasks."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Optional
6
+
7
+ RETRIEVER_TASKS: dict[str, set[str]] = {
8
+ "tf-vector": {"index"},
9
+ "sqlite-full-text-search": {"index"},
10
+ "embedding-index-inmemory": {"index"},
11
+ "embedding-index-file": {"index"},
12
+ }
13
+
14
+
15
+ def resolve_retriever_id(config: Optional[dict]) -> Optional[str]:
16
+ """Resolve retriever identifier from a retriever config dict."""
17
+ if not isinstance(config, dict):
18
+ return None
19
+ for key in ("retriever_id", "retriever_type"):
20
+ value = config.get(key)
21
+ if isinstance(value, str) and value.strip():
22
+ return value
23
+ return None
24
+
25
+
26
+ def supported_retriever_tasks(retriever_id: Optional[str]) -> set[str]:
27
+ """Return supported task names for the retriever identifier."""
28
+ if not retriever_id:
29
+ return set()
30
+ return set(RETRIEVER_TASKS.get(retriever_id, set()))