websearch-kit 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. websearch_kit/__init__.py +100 -0
  2. websearch_kit/_version.py +3 -0
  3. websearch_kit/assembly/__init__.py +33 -0
  4. websearch_kit/assembly/citations.py +70 -0
  5. websearch_kit/assembly/context_builder.py +90 -0
  6. websearch_kit/caching/__init__.py +89 -0
  7. websearch_kit/caching/keys.py +93 -0
  8. websearch_kit/caching/memory.py +121 -0
  9. websearch_kit/caching/sqlite_cache.py +244 -0
  10. websearch_kit/config.py +220 -0
  11. websearch_kit/errors.py +250 -0
  12. websearch_kit/expansion/__init__.py +104 -0
  13. websearch_kit/expansion/callback.py +89 -0
  14. websearch_kit/expansion/llm.py +153 -0
  15. websearch_kit/expansion/noop.py +45 -0
  16. websearch_kit/expansion/parsing.py +120 -0
  17. websearch_kit/extraction/__init__.py +24 -0
  18. websearch_kit/extraction/chain.py +179 -0
  19. websearch_kit/extraction/quality.py +184 -0
  20. websearch_kit/extraction/readability_extractor.py +66 -0
  21. websearch_kit/extraction/sanitize_text.py +146 -0
  22. websearch_kit/extraction/trafilatura_extractor.py +181 -0
  23. websearch_kit/extraction/types.py +58 -0
  24. websearch_kit/fetching/__init__.py +17 -0
  25. websearch_kit/fetching/fetcher.py +595 -0
  26. websearch_kit/fetching/policy.py +122 -0
  27. websearch_kit/fetching/robots.py +165 -0
  28. websearch_kit/fetching/user_agents.py +78 -0
  29. websearch_kit/grammar.py +178 -0
  30. websearch_kit/kit.py +487 -0
  31. websearch_kit/mcp/__init__.py +39 -0
  32. websearch_kit/mcp/__main__.py +61 -0
  33. websearch_kit/mcp/config_cli.py +150 -0
  34. websearch_kit/mcp/progress.py +123 -0
  35. websearch_kit/mcp/server.py +264 -0
  36. websearch_kit/mcp/tools.py +376 -0
  37. websearch_kit/models.py +291 -0
  38. websearch_kit/observability/__init__.py +21 -0
  39. websearch_kit/observability/events.py +113 -0
  40. websearch_kit/observability/logging.py +97 -0
  41. websearch_kit/owui/__init__.py +21 -0
  42. websearch_kit/owui/_compat.py +275 -0
  43. websearch_kit/owui/filter_adapter.py +604 -0
  44. websearch_kit/pipeline.py +985 -0
  45. websearch_kit/prompts.py +158 -0
  46. websearch_kit/protocols.py +116 -0
  47. websearch_kit/providers/__init__.py +149 -0
  48. websearch_kit/providers/base.py +252 -0
  49. websearch_kit/providers/brave.py +171 -0
  50. websearch_kit/providers/ddgs.py +153 -0
  51. websearch_kit/providers/exa.py +156 -0
  52. websearch_kit/providers/owui.py +183 -0
  53. websearch_kit/providers/searxng.py +167 -0
  54. websearch_kit/providers/serper.py +140 -0
  55. websearch_kit/providers/tavily.py +141 -0
  56. websearch_kit/py.typed +0 -0
  57. websearch_kit/ranking/__init__.py +28 -0
  58. websearch_kit/ranking/bm25.py +109 -0
  59. websearch_kit/ranking/budget.py +140 -0
  60. websearch_kit/resilience/__init__.py +24 -0
  61. websearch_kit/resilience/circuit.py +166 -0
  62. websearch_kit/resilience/deadline.py +88 -0
  63. websearch_kit/resilience/fallback.py +265 -0
  64. websearch_kit/resilience/health.py +141 -0
  65. websearch_kit/resilience/retry.py +108 -0
  66. websearch_kit/run.py +236 -0
  67. websearch_kit/security/__init__.py +15 -0
  68. websearch_kit/security/ranges.py +129 -0
  69. websearch_kit/security/sanitize.py +97 -0
  70. websearch_kit/security/url_guard.py +296 -0
  71. websearch_kit-0.1.0.dist-info/METADATA +190 -0
  72. websearch_kit-0.1.0.dist-info/RECORD +75 -0
  73. websearch_kit-0.1.0.dist-info/WHEEL +4 -0
  74. websearch_kit-0.1.0.dist-info/entry_points.txt +2 -0
  75. websearch_kit-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,100 @@
1
+ """websearch-kit — web search, fetch, and research pipeline for LLMs.
2
+
3
+ Public API surface (everything re-exported here is SemVer-protected; see
4
+ VERSIONING.md). Usable three ways:
5
+
6
+ * SDK: ``from websearch_kit import SearchKit``
7
+ * MCP server: ``websearch-kit-mcp`` (requires the ``[mcp]`` extra)
8
+ * Open WebUI: the single-file filter under ``adapters/owui/``
9
+ """
10
+
11
+ from typing import TYPE_CHECKING, Any
12
+
13
+ from ._version import __version__
14
+ from .config import WebSearchConfig
15
+ from .errors import (
16
+ CacheError,
17
+ ConfigError,
18
+ DeadlineExceededError,
19
+ ExpansionError,
20
+ ExtractionError,
21
+ FetchError,
22
+ GuardError,
23
+ MissingDependencyError,
24
+ ProviderError,
25
+ RobotsBlockedError,
26
+ SSRFBlockedError,
27
+ WebSearchKitError,
28
+ )
29
+ from .models import (
30
+ Degradation,
31
+ FetchOutcome,
32
+ PageContent,
33
+ ProgressEvent,
34
+ ResearchReport,
35
+ RunStats,
36
+ SearchResult,
37
+ Source,
38
+ Stage,
39
+ SystemHealth,
40
+ )
41
+ from .protocols import (
42
+ Cache,
43
+ CallbackSink,
44
+ ProgressSink,
45
+ ProviderCapabilities,
46
+ QueryExpander,
47
+ SearchProvider,
48
+ )
49
+
50
+ if TYPE_CHECKING: # pragma: no cover - import surface for type checkers only
51
+ from .kit import SearchKit, SyncSearchKit
52
+
53
+ __all__ = [
54
+ "Cache",
55
+ "CacheError",
56
+ "CallbackSink",
57
+ "ConfigError",
58
+ "DeadlineExceededError",
59
+ "Degradation",
60
+ "ExpansionError",
61
+ "ExtractionError",
62
+ "FetchError",
63
+ "FetchOutcome",
64
+ "GuardError",
65
+ "MissingDependencyError",
66
+ "PageContent",
67
+ "ProgressEvent",
68
+ "ProgressSink",
69
+ "ProviderCapabilities",
70
+ "ProviderError",
71
+ "QueryExpander",
72
+ "ResearchReport",
73
+ "RobotsBlockedError",
74
+ "RunStats",
75
+ "SSRFBlockedError",
76
+ "SearchKit",
77
+ "SearchProvider",
78
+ "SearchResult",
79
+ "Source",
80
+ "Stage",
81
+ "SyncSearchKit",
82
+ "SystemHealth",
83
+ "WebSearchConfig",
84
+ "WebSearchKitError",
85
+ "__version__",
86
+ ]
87
+
88
+ #: Engine names resolved lazily: importing ``kit`` pulls the extraction chain,
89
+ #: whose package import loads trafilatura (heavyweight). Deferring keeps
90
+ #: ``import websearch_kit`` cheap for dep-light consumers (models, config,
91
+ #: sanitize) while ``from websearch_kit import SearchKit`` still just works.
92
+ _LAZY_EXPORTS = frozenset({"SearchKit", "SyncSearchKit"})
93
+
94
+
95
+ def __getattr__(name: str) -> Any:
96
+ if name in _LAZY_EXPORTS:
97
+ from . import kit
98
+
99
+ return getattr(kit, name)
100
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
@@ -0,0 +1,3 @@
1
+ """Single source of version truth (read by hatchling and exported from __init__)."""
2
+
3
+ __version__ = "0.1.0"
@@ -0,0 +1,33 @@
1
+ """Context assembly: numbered source blocks and their 1:1 citation list.
2
+
3
+ WHY a package: the pipeline's final products — the ``[N]``-numbered context
4
+ string handed to an LLM and the ``Source`` citations a UI maps those markers
5
+ back to — must be generated from the same records in the same order or the
6
+ model cites things the UI cannot resolve. ``citations`` owns the numbering,
7
+ ``context_builder`` owns the (reference-verbatim) block formats, and both are
8
+ pure functions over :class:`SourceDraft` so the lockstep invariant is golden-
9
+ testable with no pipeline machinery.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from .citations import SourceDraft, number_sources
15
+ from .context_builder import (
16
+ SNIPPET_POOL_HEADER,
17
+ TRUNCATION_SUFFIX,
18
+ build_context,
19
+ render_pool_block,
20
+ render_source_block,
21
+ truncate_to,
22
+ )
23
+
24
+ __all__ = [
25
+ "SNIPPET_POOL_HEADER",
26
+ "TRUNCATION_SUFFIX",
27
+ "SourceDraft",
28
+ "build_context",
29
+ "number_sources",
30
+ "render_pool_block",
31
+ "render_source_block",
32
+ "truncate_to",
33
+ ]
@@ -0,0 +1,70 @@
1
+ """Citation numbering: assembled source records -> public ``Source`` models.
2
+
3
+ WHY a dedicated module: the context block and the citation list MUST stay in
4
+ lockstep — every ``[N]`` marker the LLM can emit needs exactly one
5
+ :class:`~websearch_kit.models.Source` with the same ``n``, and the numbering
6
+ must be contiguous across the two segments (fetched sources first, then the
7
+ snippet pool). Splitting "what goes in" (the pipeline's ranked
8
+ :class:`SourceDraft` records) from "how it is numbered" (here) and "how it is
9
+ rendered" (``context_builder``) keeps that 1:1 invariant testable as a pure
10
+ function: same drafts in, same numbering out, no pipeline state involved.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ from collections.abc import Sequence
16
+ from dataclasses import dataclass
17
+ from typing import Literal
18
+
19
+ from ..models import Source
20
+
21
+ __all__ = ["SourceDraft", "number_sources"]
22
+
23
+
24
+ @dataclass(slots=True)
25
+ class SourceDraft:
26
+ """One assembled source before numbering: the pipeline's working record.
27
+
28
+ Mutable on purpose — the ranking/budget stage truncates ``content`` and
29
+ assigns ``score`` in place. ``kind`` records *what the content is*
30
+ (``snippet_only`` when the search snippet substituted for a failed or
31
+ low-quality fetch), independent of *where* the draft renders (primary
32
+ block vs snippet pool) — that placement is positional, decided by which
33
+ sequence the draft is passed in.
34
+ """
35
+
36
+ title: str
37
+ url: str
38
+ snippet: str
39
+ content: str
40
+ kind: Literal["fetched", "snippet_only"]
41
+ score: float | None = None
42
+
43
+
44
+ def number_sources(
45
+ primary: Sequence[SourceDraft],
46
+ pool: Sequence[SourceDraft],
47
+ ) -> list[Source]:
48
+ """Assign contiguous 1-based ``[N]`` numbers across both segments.
49
+
50
+ ``primary`` (fetched/snippet-fallback sources, already in ranked order)
51
+ numbers first; ``pool`` (relevance-filtered snippet-only extras) continues
52
+ the sequence — exactly the reference's ``source_id`` counter that ran
53
+ uninterrupted from the fetched blocks into the additional-sources section.
54
+ The returned list is 1:1 with the ``[N]`` markers ``build_context`` renders
55
+ for the same two sequences.
56
+ """
57
+ sources: list[Source] = []
58
+ for n, draft in enumerate((*primary, *pool), start=1):
59
+ sources.append(
60
+ Source(
61
+ n=n,
62
+ title=draft.title,
63
+ url=draft.url,
64
+ snippet=draft.snippet,
65
+ kind=draft.kind,
66
+ score=draft.score,
67
+ content_chars=len(draft.content),
68
+ )
69
+ )
70
+ return sources
@@ -0,0 +1,90 @@
1
+ """Context-block rendering: numbered ``--- [N] Title ---`` segments for the LLM.
2
+
3
+ WHY the formats are frozen: the answer prompt (``prompts.build_answer_prompt``)
4
+ instructs the model to cite with inline ``[N]`` markers and to fall back to the
5
+ ``Summary (Snippet)`` line when ``Full Content`` is poor — so the literal field
6
+ labels in these blocks are part of the prompt contract, not cosmetics. All
7
+ three templates (fetched block, snippet-pool block, pool header) are ported
8
+ verbatim from the reference ``_process_results`` Phase C so the golden tests
9
+ pin byte-for-byte parity. Everything here is a pure string function; numbering
10
+ stays in lockstep with ``citations.number_sources`` because both iterate the
11
+ same ``(primary, pool)`` sequences in the same order.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ from collections.abc import Sequence
17
+
18
+ from .citations import SourceDraft
19
+
20
+ __all__ = [
21
+ "SNIPPET_POOL_HEADER",
22
+ "TRUNCATION_SUFFIX",
23
+ "build_context",
24
+ "render_pool_block",
25
+ "render_source_block",
26
+ "truncate_to",
27
+ ]
28
+
29
+ #: Appended when a source's content is cut to its budget allocation (verbatim
30
+ #: reference suffix — the visible, in-context signal that text was dropped).
31
+ TRUNCATION_SUFFIX = "... [TRUNCATED]"
32
+
33
+ #: Separator announcing the snippet-only pool segment (verbatim reference).
34
+ SNIPPET_POOL_HEADER = "\n--- ADDITIONAL SOURCES (snippet only, same [N] citation format) ---"
35
+
36
+
37
+ def render_source_block(n: int, draft: SourceDraft) -> str:
38
+ """One primary source block (fetched or snippet-fallback content)."""
39
+ return (
40
+ f"--- [{n}] {draft.title} ---\n"
41
+ f"URL: {draft.url}\n"
42
+ f"Summary (Snippet): {draft.snippet}\n"
43
+ f"Full Content:\n{draft.content}\n"
44
+ )
45
+
46
+
47
+ def render_pool_block(n: int, draft: SourceDraft) -> str:
48
+ """One snippet-pool block (never fetched; the snippet *is* the content)."""
49
+ return (
50
+ f"--- [{n}] {draft.title} (snippet only) ---\nURL: {draft.url}\nContent: {draft.snippet}\n"
51
+ )
52
+
53
+
54
+ def truncate_to(content: str, alloc: int) -> tuple[str, bool]:
55
+ """Cut ``content`` to ``alloc`` chars, marking the cut with the suffix.
56
+
57
+ Returns ``(text, truncated)``. The suffix is appended *after* the cut
58
+ (reference behavior: ``content[:alloc] + "... [TRUNCATED]"``), so the
59
+ rendered block may slightly exceed ``alloc`` — the budget governs content,
60
+ the marker is overhead the model needs to see.
61
+ """
62
+ if len(content) <= alloc:
63
+ return content, False
64
+ return content[:alloc] + TRUNCATION_SUFFIX, True
65
+
66
+
67
+ def build_context(
68
+ primary: Sequence[SourceDraft],
69
+ pool: Sequence[SourceDraft],
70
+ ) -> str:
71
+ """Assemble the full ``<search_results>`` payload from both segments.
72
+
73
+ Primary blocks render first (ranked order preserved), then — only when the
74
+ pool is non-empty — the pool header and the snippet-only blocks, with
75
+ ``[N]`` numbering running contiguously across the boundary. Blocks are
76
+ joined with ``"\\n"`` (each block already ends in a newline, yielding the
77
+ reference's blank-line separation). Empty input renders an empty string —
78
+ the caller decides what a sourceless run means.
79
+ """
80
+ parts: list[str] = []
81
+ n = 1
82
+ for draft in primary:
83
+ parts.append(render_source_block(n, draft))
84
+ n += 1
85
+ if pool:
86
+ parts.append(SNIPPET_POOL_HEADER)
87
+ for draft in pool:
88
+ parts.append(render_pool_block(n, draft))
89
+ n += 1
90
+ return "\n".join(parts)
@@ -0,0 +1,89 @@
1
+ """Caching subsystem: backends, key builders, and the :func:`make_cache` factory.
2
+
3
+ WHY this package: the pipeline caches three things — provider searches, fetched
4
+ pages, and query expansions — each with its own TTL (see ``WebSearchConfig``).
5
+ All three flow through the single :class:`~websearch_kit.protocols.Cache`
6
+ protocol, so the choice of backend is a configuration detail resolved once, here,
7
+ by :func:`make_cache`. Centralizing construction keeps backend wiring (the sqlite
8
+ directory resolution, the fail-soft :class:`CacheGuard` wrapping) out of the
9
+ engine.
10
+
11
+ The factory always wraps a real backend in :class:`CacheGuard` so a backend fault
12
+ degrades to a cache miss instead of failing the run (no-fail-silent: the guard
13
+ logs and counts every swallowed error). ``cache_backend="none"`` returns ``None``
14
+ — the engine treats a ``None`` cache as "caching disabled" and skips lookups
15
+ entirely, which is cheaper than routing every call through a no-op object.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import os
21
+ import time
22
+ from collections.abc import Callable
23
+ from pathlib import Path
24
+
25
+ from ..config import WebSearchConfig
26
+ from ..protocols import Cache
27
+ from .keys import content_key, expansion_key, search_key
28
+ from .memory import MemoryTTLCache
29
+ from .sqlite_cache import CacheGuard, SqliteCache
30
+
31
+ __all__ = [
32
+ "CacheGuard",
33
+ "MemoryTTLCache",
34
+ "SqliteCache",
35
+ "content_key",
36
+ "default_cache_dir",
37
+ "expansion_key",
38
+ "make_cache",
39
+ "search_key",
40
+ ]
41
+
42
+
43
+ def default_cache_dir() -> Path:
44
+ """Resolve the default sqlite-cache directory using the XDG base-dir spec.
45
+
46
+ Honors ``$XDG_CACHE_HOME`` when set to an absolute path (per the spec, a
47
+ relative or empty value is ignored), otherwise falls back to
48
+ ``~/.cache``; the websearch-kit cache lives in a ``websearch-kit``
49
+ subdirectory of that base. Implemented with the standard library only
50
+ (``os``/``pathlib``) so caching never drags in ``platformdirs``.
51
+ """
52
+ xdg_cache = os.environ.get("XDG_CACHE_HOME")
53
+ # The XDG spec mandates absolute paths; ignore a relative/empty value.
54
+ if xdg_cache and os.path.isabs(xdg_cache):
55
+ base = Path(xdg_cache)
56
+ else:
57
+ base = Path.home() / ".cache"
58
+ return base / "websearch-kit"
59
+
60
+
61
+ def make_cache(
62
+ config: WebSearchConfig,
63
+ clock: Callable[[], float] = time.monotonic,
64
+ ) -> Cache | None:
65
+ """Build the configured cache backend, or ``None`` when caching is disabled.
66
+
67
+ * ``"memory"`` -> :class:`MemoryTTLCache` (process-local, default).
68
+ * ``"sqlite"`` -> :class:`SqliteCache` rooted at ``config.cache_dir`` (or
69
+ :func:`default_cache_dir` when unset).
70
+ * ``"none"`` -> ``None`` (the engine skips caching entirely).
71
+
72
+ Any real backend is wrapped in :class:`CacheGuard` so backend faults become
73
+ logged, counted cache misses rather than run failures.
74
+
75
+ The ``clock`` is threaded into the memory backend (whose TTL uses a monotonic
76
+ source) so tests can drive expiry deterministically. The sqlite backend keeps
77
+ its own wall-clock default because its TTLs must survive process restarts.
78
+ """
79
+ backend = config.cache_backend
80
+ if backend == "none":
81
+ return None
82
+ if backend == "memory":
83
+ return CacheGuard(MemoryTTLCache(max_entries=512, clock=clock))
84
+ if backend == "sqlite":
85
+ directory = Path(config.cache_dir) if config.cache_dir else default_cache_dir()
86
+ return CacheGuard(SqliteCache(directory=directory))
87
+ # CacheBackend is a closed Literal; an unhandled member is a programming
88
+ # error, not a runtime input — surface it loudly rather than fail silent.
89
+ raise AssertionError(f"unhandled cache backend: {backend!r}") # pragma: no cover
@@ -0,0 +1,93 @@
1
+ """Deterministic cache-key builders for the three cacheable pipeline stages.
2
+
3
+ WHY a dedicated module: cache correctness hinges on *stable, collision-resistant*
4
+ keys derived from exactly the inputs that change a result — and nothing else. A
5
+ key built ad-hoc at each call site drifts (parameter order, optional-None
6
+ handling, forgetting to normalize a URL) and silently poisons the cache with
7
+ stale or cross-contaminated entries. Centralizing the builders makes the keyspace
8
+ auditable and the canonicalization rules a single source of truth.
9
+
10
+ Design choices:
11
+
12
+ * **sha256 hex of a canonical string.** Keys are fixed-length, opaque, and safe
13
+ to use as a sqlite ``TEXT PRIMARY KEY`` regardless of how long/odd the inputs
14
+ are. sha256 is not used here for any security property — only for a uniform,
15
+ low-collision digest — so the lint that flags weak hashes does not apply.
16
+ * **Explicit stage prefixes** (``search:``/``content:``/``expand:``) keep the
17
+ three keyspaces disjoint inside a shared backend, so a content URL can never
18
+ alias a search query that happens to hash the same canonical bytes.
19
+ * **``content_key`` applies :func:`sanitize_url` itself.** Stripping tracking
20
+ parameters (utm_*, gclid, ...) before hashing is what makes two links to the
21
+ same page share a cache entry. Doing it *inside* the key builder means a caller
22
+ physically cannot forget to normalize first — the dedup win is structural, not
23
+ a convention someone must remember.
24
+ """
25
+
26
+ from __future__ import annotations
27
+
28
+ import hashlib
29
+
30
+ from ..security.sanitize import sanitize_url
31
+
32
+ __all__ = [
33
+ "content_key",
34
+ "expansion_key",
35
+ "search_key",
36
+ ]
37
+
38
+ #: Field separator for the canonical string. A control character (unit
39
+ #: separator, 0x1f) cannot appear in a normal query/URL/prompt, so it cannot be
40
+ #: forged by input to merge two distinct tuples into one canonical string.
41
+ _SEP = "\x1f"
42
+
43
+
44
+ def _digest(prefix: str, *parts: str) -> str:
45
+ """Return ``"{prefix}{sha256-hex}"`` over the unit-separator-joined parts."""
46
+ canonical = _SEP.join(parts)
47
+ hexdigest = hashlib.sha256(canonical.encode("utf-8")).hexdigest()
48
+ return f"{prefix}{hexdigest}"
49
+
50
+
51
+ def search_key(
52
+ provider: str,
53
+ query: str,
54
+ count: int,
55
+ lang: str | None,
56
+ time_range: str | None,
57
+ ) -> str:
58
+ """Key for one provider search call.
59
+
60
+ Every argument that changes the returned hits participates: the provider
61
+ (different engines, different results), the query, the requested ``count``,
62
+ and the optional language / time-range filters. ``None`` is canonicalized to
63
+ the empty string so an unset filter is stable and distinct from any real
64
+ value a provider would accept.
65
+ """
66
+ return _digest(
67
+ "search:",
68
+ provider,
69
+ query,
70
+ str(count),
71
+ lang or "",
72
+ time_range or "",
73
+ )
74
+
75
+
76
+ def content_key(sanitized_url: str) -> str:
77
+ """Key for one fetched+extracted page, keyed on the *normalized* URL.
78
+
79
+ The argument name documents intent, but this builder does not trust the
80
+ caller to have normalized: it runs :func:`sanitize_url` here so two URLs that
81
+ differ only by tracking parameters (``?utm_source=...``) collapse to the same
82
+ cache entry. This is deliberate — the strip-before-hash cannot be skipped.
83
+ """
84
+ return _digest("content:", sanitize_url(sanitized_url))
85
+
86
+
87
+ def expansion_key(prompt_text: str) -> str:
88
+ """Key for a query-expansion result, keyed on the full prompt text.
89
+
90
+ The expansion output is a pure function of the prompt handed to the LLM, so
91
+ the entire prompt string is the cache identity.
92
+ """
93
+ return _digest("expand:", prompt_text)
@@ -0,0 +1,121 @@
1
+ """In-process TTL cache with LRU eviction — the zero-config default backend.
2
+
3
+ WHY in-memory by default: the common deployment is a single long-lived process
4
+ (an OWUI worker, an MCP server) where a process-local cache eliminates duplicate
5
+ provider calls and page fetches within a session at zero operational cost — no
6
+ file, no extra dependency. It is intentionally *not* shared across processes;
7
+ callers needing cross-process persistence opt into :class:`SqliteCache`.
8
+
9
+ Concurrency model: every mutation (and the lazy-expiry bookkeeping on ``get``)
10
+ runs under a single :class:`asyncio.Lock`. The cache is only safe within one
11
+ event loop — which is the contract, since the pipeline is async and single-loop.
12
+ The lock makes the read-modify-write on the backing :class:`OrderedDict` atomic
13
+ with respect to other awaiting tasks, so concurrent ``get``/``set`` from
14
+ ``asyncio.gather`` can never observe or produce a half-updated structure.
15
+
16
+ Eviction policy:
17
+
18
+ * **TTL** — each entry carries an absolute monotonic expiry timestamp. Expired
19
+ entries are dropped *lazily* on ``get`` (the cheap, always-correct path) and
20
+ *opportunistically* swept on ``set`` only when the cache is over capacity
21
+ (amortizing the sweep cost instead of scanning on every write).
22
+ * **LRU** — recency is tracked by position in the ``OrderedDict``: a hit or a
23
+ write moves the key to the most-recently-used end. When the map exceeds
24
+ ``max_entries`` after a write, the least-recently-used keys are popped from the
25
+ front until back within bounds.
26
+
27
+ The clock is injectable (default :func:`time.monotonic`) so tests drive TTL
28
+ expiry deterministically without sleeping. Monotonic time is used because TTL is
29
+ a duration; it must be immune to wall-clock jumps (NTP steps, DST).
30
+ """
31
+
32
+ from __future__ import annotations
33
+
34
+ import asyncio
35
+ import time
36
+ from collections import OrderedDict
37
+ from collections.abc import Callable
38
+ from dataclasses import dataclass
39
+ from typing import Any
40
+
41
+ __all__ = ["MemoryTTLCache"]
42
+
43
+
44
+ @dataclass(slots=True)
45
+ class _Entry:
46
+ """A cached value plus its absolute monotonic expiry timestamp."""
47
+
48
+ value: Any
49
+ expires_at: float
50
+
51
+
52
+ class MemoryTTLCache:
53
+ """Async, LRU-bounded, per-entry-TTL cache implementing :class:`protocols.Cache`.
54
+
55
+ Args:
56
+ max_entries: Hard cap on retained entries; LRU keys are evicted past it.
57
+ clock: Monotonic time source returning seconds; injected for tests.
58
+ """
59
+
60
+ def __init__(
61
+ self,
62
+ max_entries: int = 512,
63
+ clock: Callable[[], float] = time.monotonic,
64
+ ) -> None:
65
+ self._max_entries = max_entries
66
+ self._clock = clock
67
+ # Insertion/access order IS the LRU order: front = least recently used.
68
+ self._store: OrderedDict[str, _Entry] = OrderedDict()
69
+ self._lock = asyncio.Lock()
70
+
71
+ async def get(self, key: str) -> Any | None:
72
+ """Return the live value for ``key``, or ``None`` if absent or expired.
73
+
74
+ A live hit is promoted to most-recently-used. An expired entry is dropped
75
+ lazily here (and reported as a miss) so stale data never escapes, even if
76
+ no write has triggered an opportunistic sweep.
77
+ """
78
+ async with self._lock:
79
+ entry = self._store.get(key)
80
+ if entry is None:
81
+ return None
82
+ if self._is_expired(entry):
83
+ # Lazy expiry: drop the stale row and report a miss. Not
84
+ # fail-silent — an expired entry is definitionally not present.
85
+ del self._store[key]
86
+ return None
87
+ self._store.move_to_end(key)
88
+ return entry.value
89
+
90
+ async def set(self, key: str, value: Any, ttl: float) -> None:
91
+ """Insert/replace ``key`` with ``value``, expiring ``ttl`` seconds hence.
92
+
93
+ The key becomes most-recently-used. When the store is over capacity after
94
+ the write, expired entries are swept first (reclaiming space without
95
+ discarding live data), then the least-recently-used keys are evicted until
96
+ back within ``max_entries``.
97
+ """
98
+ async with self._lock:
99
+ expires_at = self._clock() + ttl
100
+ if key in self._store:
101
+ self._store.move_to_end(key)
102
+ self._store[key] = _Entry(value=value, expires_at=expires_at)
103
+ if len(self._store) > self._max_entries:
104
+ self._sweep_expired()
105
+ self._evict_lru()
106
+
107
+ def _is_expired(self, entry: _Entry) -> bool:
108
+ return entry.expires_at <= self._clock()
109
+
110
+ def _sweep_expired(self) -> None:
111
+ """Drop every currently-expired entry. Caller must hold the lock."""
112
+ now = self._clock()
113
+ stale = [key for key, entry in self._store.items() if entry.expires_at <= now]
114
+ for key in stale:
115
+ del self._store[key]
116
+
117
+ def _evict_lru(self) -> None:
118
+ """Pop least-recently-used keys until within capacity. Caller holds lock."""
119
+ while len(self._store) > self._max_entries:
120
+ # popitem(last=False) removes the front == least-recently-used entry.
121
+ self._store.popitem(last=False)