PyPI - threadkeeper - Versions diffs - 0.6.2__tar.gz → 0.7.0__tar.gz - Mend

threadkeeper 0.6.2tar.gz → 0.7.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (105) hide show

{threadkeeper-0.6.2/threadkeeper.egg-info → threadkeeper-0.7.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: threadkeeper
-Version: 0.6.2
+Version: 0.7.0
 Summary: Multi-agent shared brain across Claude Code/Desktop, Codex, Gemini, Copilot, VS Code. Cross-session memory, self-improving skill loops, inter-agent signaling — one local MCP server.
 Author: thread-keeper contributors
 License: MIT
@@ -24,12 +24,17 @@ Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: mcp>=1.0.0
 Provides-Extra: semantic
-Requires-Dist: sentence-transformers>=2.2.0; extra == "semantic"
+Requires-Dist: fastembed>=0.3; extra == "semantic"
 Requires-Dist: numpy>=1.24.0; extra == "semantic"
 Requires-Dist: sqlite-vec>=0.1.9; extra == "semantic"
+Provides-Extra: semantic-st
+Requires-Dist: sentence-transformers>=2.2.0; extra == "semantic-st"
+Requires-Dist: numpy>=1.24.0; extra == "semantic-st"
+Requires-Dist: sqlite-vec>=0.1.9; extra == "semantic-st"
 Provides-Extra: dev
 Requires-Dist: pytest>=8.0; extra == "dev"
 Requires-Dist: pytest-cov>=5.0; extra == "dev"
+Requires-Dist: pytest-forked>=1.6; extra == "dev"
 Dynamic: license-file
 # thread-keeper
@@ -189,7 +194,7 @@ autonomous learning daemons cannot recursively start inside review forks.
 A daemon measures combined child RSS every 10 s; admission control
 refuses a new spawn that would exceed `THREADKEEPER_SPAWN_BUDGET_MB`
 (3 GB default). Slim children that need semantic search delegate to the
-parent via `search_via_parent` — no per-child copy of sentence-transformers.
+parent via `search_via_parent` — no per-child copy of the embedding model.
 ### Learning loops
@@ -435,7 +440,9 @@ The most-used env knobs (full list in `threadkeeper/config.py`):
 | `THREADKEEPER_MEMORY_GUARD_RETIRE_LIVE` | "" (off) | allow retiring parent-alive MCP servers; off protects live clients |
 | `THREADKEEPER_MEMORY_GUARD_NOTIFY` | "1" | send macOS desktop notification when possible |
 | `THREADKEEPER_INGEST_INTERVAL_S` | 3 | transcript ingest tick (s) |
-| `THREADKEEPER_NO_EMBEDDINGS` | "" | force-disable sentence-transformers |
+| `THREADKEEPER_NO_EMBEDDINGS` | "" | force-disable the embedding model (FTS5 + delegate only) |
+| `THREADKEEPER_EMBED_BACKEND` | `onnx` | embedding runtime: `onnx` (fastembed, no PyTorch) or `sentence-transformers` (legacy fallback) |
+| `THREADKEEPER_EMBED_MODEL` | `paraphrase-multilingual-MiniLM-L12-v2` | 384-dim cross-lingual embedding model |
 | `THREADKEEPER_SPAWNED_CHILD` | "" | spawn-internal marker; disables autonomous daemons in children |
 | `THREADKEEPER_SKILL_NUDGE_INTERVAL` | 10 | events between `skill_hint` nudges |
@@ -525,6 +532,34 @@ Hooks and small runtime artifacts: `~/.threadkeeper/hooks/`.
 ---
+## Embeddings
+Semantic search runs `paraphrase-multilingual-MiniLM-L12-v2` (384-dim,
+RU+EN+50 langs). The default backend is **fastembed / ONNX Runtime** — no
+PyTorch. A model-loaded process sits at ~700 MB physical footprint
+(~850 MB RSS), down from ~1.8 GB on the PyTorch backend.
+A **sentence-transformers** (PyTorch) backend is kept as an opt-in fallback.
+It is heavier (~1.8 GB RSS) and produces vectors that are *not numerically
+identical* to the ONNX backend's, so switching backends warrants a recompute:
+```bash
+# Install the fallback runtime and switch to it:
+pip install -e '.[semantic-st]'
+export THREADKEEPER_EMBED_BACKEND=sentence-transformers
+# After any backend switch, homogenize the stored corpus so queries and
+# stored vectors live in the same space:
+tk-migrate-embeddings --all          # or --notes-only / --dialog-only
+tk-migrate-embeddings --dry-run      # report stale counts only
+```
+The migration is batched, resumable, and idempotent (a second run finds
+nothing stale). Both backends emit 384-dim vectors, so the `vec0` schema is
+unchanged.
+---
 ## Verifying ingest across CLIs
 ```bash

{threadkeeper-0.6.2 → threadkeeper-0.7.0}/README.md RENAMED Viewed

@@ -155,7 +155,7 @@ autonomous learning daemons cannot recursively start inside review forks.
 A daemon measures combined child RSS every 10 s; admission control
 refuses a new spawn that would exceed `THREADKEEPER_SPAWN_BUDGET_MB`
 (3 GB default). Slim children that need semantic search delegate to the
-parent via `search_via_parent` — no per-child copy of sentence-transformers.
+parent via `search_via_parent` — no per-child copy of the embedding model.
 ### Learning loops
@@ -401,7 +401,9 @@ The most-used env knobs (full list in `threadkeeper/config.py`):
 | `THREADKEEPER_MEMORY_GUARD_RETIRE_LIVE` | "" (off) | allow retiring parent-alive MCP servers; off protects live clients |
 | `THREADKEEPER_MEMORY_GUARD_NOTIFY` | "1" | send macOS desktop notification when possible |
 | `THREADKEEPER_INGEST_INTERVAL_S` | 3 | transcript ingest tick (s) |
-| `THREADKEEPER_NO_EMBEDDINGS` | "" | force-disable sentence-transformers |
+| `THREADKEEPER_NO_EMBEDDINGS` | "" | force-disable the embedding model (FTS5 + delegate only) |
+| `THREADKEEPER_EMBED_BACKEND` | `onnx` | embedding runtime: `onnx` (fastembed, no PyTorch) or `sentence-transformers` (legacy fallback) |
+| `THREADKEEPER_EMBED_MODEL` | `paraphrase-multilingual-MiniLM-L12-v2` | 384-dim cross-lingual embedding model |
 | `THREADKEEPER_SPAWNED_CHILD` | "" | spawn-internal marker; disables autonomous daemons in children |
 | `THREADKEEPER_SKILL_NUDGE_INTERVAL` | 10 | events between `skill_hint` nudges |
@@ -491,6 +493,34 @@ Hooks and small runtime artifacts: `~/.threadkeeper/hooks/`.
 ---
+## Embeddings
+Semantic search runs `paraphrase-multilingual-MiniLM-L12-v2` (384-dim,
+RU+EN+50 langs). The default backend is **fastembed / ONNX Runtime** — no
+PyTorch. A model-loaded process sits at ~700 MB physical footprint
+(~850 MB RSS), down from ~1.8 GB on the PyTorch backend.
+A **sentence-transformers** (PyTorch) backend is kept as an opt-in fallback.
+It is heavier (~1.8 GB RSS) and produces vectors that are *not numerically
+identical* to the ONNX backend's, so switching backends warrants a recompute:
+```bash
+# Install the fallback runtime and switch to it:
+pip install -e '.[semantic-st]'
+export THREADKEEPER_EMBED_BACKEND=sentence-transformers
+# After any backend switch, homogenize the stored corpus so queries and
+# stored vectors live in the same space:
+tk-migrate-embeddings --all          # or --notes-only / --dialog-only
+tk-migrate-embeddings --dry-run      # report stale counts only
+```
+The migration is batched, resumable, and idempotent (a second run finds
+nothing stale). Both backends emit 384-dim vectors, so the `vec0` schema is
+unchanged.
+---
 ## Verifying ingest across CLIs
 ```bash

{threadkeeper-0.6.2 → threadkeeper-0.7.0}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "threadkeeper"
-version = "0.6.2"
+version = "0.7.0"
 description = "Multi-agent shared brain across Claude Code/Desktop, Codex, Gemini, Copilot, VS Code. Cross-session memory, self-improving skill loops, inter-agent signaling — one local MCP server."
 requires-python = ">=3.11"
 authors = [{ name = "thread-keeper contributors" }]
@@ -32,15 +32,27 @@ dependencies = [
 [project.optional-dependencies]
 # Semantic cross-language search + sub-linear vector index. Recommended
 # for any real use — without it, dialog_search falls back to FTS5 only.
+# Default backend is fastembed/ONNX Runtime: no PyTorch, ~700MB footprint.
 semantic = [
+    "fastembed>=0.3",
+    "numpy>=1.24.0",
+    "sqlite-vec>=0.1.9",
+]
+# Legacy PyTorch backend, kept as an opt-in fallback. Install this AND set
+# THREADKEEPER_EMBED_BACKEND=sentence-transformers to use it. ~1.8GB RSS.
+semantic-st = [
     "sentence-transformers>=2.2.0",
     "numpy>=1.24.0",
     "sqlite-vec>=0.1.9",
 ]
-# Test runner + coverage.
+# Test runner + coverage. pytest-forked isolates each test in its own
+# process: the per-test package re-import (tests/conftest.py) accumulates
+# native ONNX/tokenizer thread pools that can deadlock sqlite finalize in a
+# single long-lived process, so CI runs `pytest --forked`.
 dev = [
     "pytest>=8.0",
     "pytest-cov>=5.0",
+    "pytest-forked>=1.6",
 ]
 [project.urls]
@@ -54,6 +66,9 @@ Changelog     = "https://github.com/po4erk91/thread-keeper/releases"
 # After `pip install threadkeeper`, the user gets `thread-keeper-setup`
 # directly on PATH. Equivalent to `python -m threadkeeper._setup`.
 thread-keeper-setup = "threadkeeper._setup:main"
+# Recompute stored embeddings with the active backend (e.g. after switching to
+# the ONNX default). Equivalent to `python -m threadkeeper.migrate_embeddings`.
+tk-migrate-embeddings = "threadkeeper.migrate_embeddings:main"
 [tool.setuptools.packages.find]
 include = ["threadkeeper*"]

threadkeeper-0.7.0/tests/test_onnx_embeddings.py ADDED Viewed

@@ -0,0 +1,133 @@
+"""ONNX embedding backend + tk-migrate-embeddings.
+Verifies that:
+- the active backend encodes to L2-normalized 384-dim float32 vectors
+- embed_tag stamps the active backend for a real blob, None otherwise
+- freshly inserted notes carry the embed_backend tag
+- the migration recomputes stale (NULL-tagged) rows, tags them, and is
+  idempotent + dry-run-safe
+Skips entirely when no embedding backend is installed.
+"""
+from __future__ import annotations
+import time
+import pytest
+pytestmark = pytest.mark.slow  # model warmup on first encode
+def _tool(pkg, name):
+    return pkg["mcp"]._tool_manager._tools[name].fn
+@pytest.fixture()
+def sem_pkg(fresh_mp):
+    """Fresh package against a clean tmp DB; skip if semantic search is off."""
+    if not fresh_mp["config"].SEMANTIC_AVAILABLE:
+        pytest.skip("no embedding backend installed in this environment")
+    return fresh_mp
+def _seed_legacy_notes(conn, n: int):
+    """Insert n notes with a real embedding blob but a NULL backend tag,
+    simulating rows written before the ONNX migration."""
+    from threadkeeper import embeddings as emb
+    for i in range(n):
+        blob = emb._embed(f"legacy seeded note {i} about webhooks and retries")
+        conn.execute(
+            "INSERT INTO notes (content, kind, created_at, embedding, embed_backend) "
+            "VALUES (?,?,?,?,NULL)",
+            (f"legacy seeded note {i}", "insight", int(time.time()), blob),
+        )
+    conn.commit()
+# ── encode primitives ────────────────────────────────────────────────
+def test_encode_is_normalized_384_float32(sem_pkg):
+    import numpy as np
+    from threadkeeper import embeddings as emb
+    arr = emb._encode(["привет мир", "hello world"])
+    assert arr is not None
+    assert arr.shape == (2, 384)
+    assert arr.dtype == np.dtype("float32")
+    assert np.allclose(np.linalg.norm(arr, axis=1), 1.0, atol=1e-3)
+def test_encode_is_cross_lingual(sem_pkg):
+    """A RU/EN translation pair must score higher than an unrelated phrase."""
+    from threadkeeper import embeddings as emb
+    v = emb._encode(["кошка", "cat", "quarterly financial report"])
+    assert float(v[0] @ v[1]) > float(v[0] @ v[2])
+def test_embed_tag(sem_pkg):
+    from threadkeeper import embeddings as emb
+    active = sem_pkg["config"].EMBED_BACKEND
+    assert emb.embed_tag(b"\x00\x01") == active
+    assert emb.embed_tag(None) is None
+# ── write-path tagging ───────────────────────────────────────────────
+def test_new_note_carries_backend_tag(sem_pkg):
+    tid = _tool(sem_pkg, "open_thread")(question="backend tag test")
+    _tool(sem_pkg, "note")(thread_id=tid,
+                           content="tagged note about idempotency keys",
+                           kind="insight")
+    conn = sem_pkg["db"].get_db()
+    active = sem_pkg["config"].EMBED_BACKEND
+    row = conn.execute(
+        "SELECT embedding, embed_backend FROM notes "
+        "WHERE thread_id=? ORDER BY id DESC LIMIT 1",
+        (tid,),
+    ).fetchone()
+    assert row["embedding"] is not None
+    assert row["embed_backend"] == active
+# ── migration ────────────────────────────────────────────────────────
+def test_migration_recomputes_tags_and_is_idempotent(sem_pkg):
+    from threadkeeper import migrate_embeddings as mig
+    active = sem_pkg["config"].EMBED_BACKEND
+    conn = sem_pkg["db"].get_db()
+    _seed_legacy_notes(conn, 3)
+    assert mig._count_stale(conn, "notes", active) == 3
+    rc = mig.run(do_notes=True, do_dialog=False, batch=2,
+                 dry_run=False, log=lambda _m: None)
+    assert rc == 0
+    assert mig._count_stale(conn, "notes", active) == 0
+    tagged = conn.execute(
+        "SELECT COUNT(*) FROM notes WHERE embed_backend=?", (active,)
+    ).fetchone()[0]
+    assert tagged >= 3
+    # idempotent: a second pass finds nothing stale and changes nothing.
+    rc2 = mig.run(do_notes=True, do_dialog=False, batch=2,
+                  dry_run=False, log=lambda _m: None)
+    assert rc2 == 0
+    assert mig._count_stale(conn, "notes", active) == 0
+def test_migration_dry_run_writes_nothing(sem_pkg):
+    from threadkeeper import migrate_embeddings as mig
+    active = sem_pkg["config"].EMBED_BACKEND
+    conn = sem_pkg["db"].get_db()
+    _seed_legacy_notes(conn, 2)
+    assert mig._count_stale(conn, "notes", active) == 2
+    mig.run(do_notes=True, do_dialog=False, batch=10,
+            dry_run=True, log=lambda _m: None)
+    # still stale — dry run must not touch the rows
+    assert mig._count_stale(conn, "notes", active) == 2
+def test_migration_requires_a_scope_flag(sem_pkg):
+    from threadkeeper import migrate_embeddings as mig
+    with pytest.raises(SystemExit):
+        mig.main([])  # argparse error → SystemExit(2)

{threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/config.py RENAMED Viewed

@@ -2,6 +2,7 @@
 Imported wherever a constant or config is needed; cheap to import."""
 from __future__ import annotations
+import importlib.util
 import os
 from pathlib import Path
 from typing import Optional
@@ -15,6 +16,23 @@ EMBED_MODEL_NAME: str = os.environ.get(
     "paraphrase-multilingual-MiniLM-L12-v2",  # 118 MB, RU+EN cross-lingual
 )
+# Embedding runtime backend. 'onnx' (default) runs the model through fastembed /
+# ONNX Runtime — no PyTorch, ~700MB footprint (vs ~1.8GB). 'sentence-transformers' is
+# the legacy PyTorch path, kept as an opt-in fallback (install `.[semantic-st]`
+# and set THREADKEEPER_EMBED_BACKEND=sentence-transformers). Both produce the
+# same 384-dim vectors, but fastembed's are numerically NOT identical to ST's,
+# so switching backends warrants a `tk-migrate-embeddings --all` recompute.
+EMBED_BACKEND: str = os.environ.get(
+    "THREADKEEPER_EMBED_BACKEND", "onnx"
+).strip().lower()
+# fastembed addresses the model under its sentence-transformers org prefix;
+# SentenceTransformer accepts the bare name. Normalize for the ONNX backend.
+FASTEMBED_MODEL_ID: str = (
+    EMBED_MODEL_NAME if "/" in EMBED_MODEL_NAME
+    else f"sentence-transformers/{EMBED_MODEL_NAME}"
+)
 DB_PATH.parent.mkdir(parents=True, exist_ok=True)
 # One-shot migration from the historical name `memory_partner`. If the new
@@ -52,15 +70,26 @@ NO_EMBEDDINGS: bool = os.environ.get(
 # Optional semantic search. If sentence-transformers is not installed OR the
 # no-embeddings opt-out is set, fall back to FTS5 keyword matching + delegate.
 # Brief still works either way.
+def _installed(*mods: str) -> bool:
+    """True if every module is importable, checked WITHOUT importing it.
+    `find_spec` locates the module via the import machinery but never executes
+    it — so probing availability here doesn't pull PyTorch / ONNX Runtime /
+    tokenizers (and their thread pools) into every process that imports config.
+    The heavy import stays lazy in `embeddings._get_model()`.
+    """
+    try:
+        return all(importlib.util.find_spec(m) is not None for m in mods)
+    except (ImportError, ValueError):
+        return False
 if NO_EMBEDDINGS:
     SEMANTIC_AVAILABLE: bool = False
-else:
-    try:
-        from sentence_transformers import SentenceTransformer  # type: ignore  # noqa: F401
-        import numpy as np  # type: ignore  # noqa: F401
-        SEMANTIC_AVAILABLE = True
-    except Exception:
-        SEMANTIC_AVAILABLE = False
+elif EMBED_BACKEND == "sentence-transformers":
+    SEMANTIC_AVAILABLE = _installed("sentence_transformers", "numpy")
+else:  # 'onnx' (default)
+    SEMANTIC_AVAILABLE = _installed("fastembed", "numpy")
 # Client label used for `presence`/`sessions` rows.
 CLIENT_LABEL: str = os.environ.get("THREADKEEPER_CLIENT", "claude")

{threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/db.py RENAMED Viewed

@@ -72,7 +72,8 @@ CREATE TABLE IF NOT EXISTS notes (
     kind        TEXT NOT NULL,
     created_at  INTEGER NOT NULL,
     session_id  TEXT,
-    embedding   BLOB
+    embedding   BLOB,
+    embed_backend TEXT           -- backend that produced `embedding`; NULL = legacy
 );
 CREATE TABLE IF NOT EXISTS verbatim (
@@ -143,7 +144,8 @@ CREATE TABLE IF NOT EXISTS dialog_messages (
     content      TEXT NOT NULL,                -- concatenated text blocks
     model        TEXT,
     created_at   INTEGER NOT NULL,
-    embedding    BLOB
+    embedding    BLOB,
+    embed_backend TEXT           -- backend that produced `embedding`; NULL = legacy
 );
 CREATE TABLE IF NOT EXISTS ingest_state (
@@ -500,6 +502,11 @@ def get_db() -> sqlite3.Connection:
         "ALTER TABLE skill_usage ADD COLUMN wrong_count "
         "INTEGER NOT NULL DEFAULT 0",
         "ALTER TABLE skill_usage ADD COLUMN last_wrong_at INTEGER",
+        # Embedding backend tag. NULL = legacy (sentence-transformers, pre-ONNX
+        # migration). New/recomputed rows carry 'onnx' or 'sentence-transformers'
+        # so `tk-migrate-embeddings` can find stale vectors and skip done ones.
+        "ALTER TABLE notes ADD COLUMN embed_backend TEXT",
+        "ALTER TABLE dialog_messages ADD COLUMN embed_backend TEXT",
     ):
         try:
             conn.execute(ddl)

{threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/embeddings.py RENAMED Viewed

@@ -17,7 +17,12 @@ import sqlite3
 import threading
 from typing import Optional
-from .config import SEMANTIC_AVAILABLE, EMBED_MODEL_NAME
+from .config import (
+    SEMANTIC_AVAILABLE,
+    EMBED_MODEL_NAME,
+    EMBED_BACKEND,
+    FASTEMBED_MODEL_ID,
+)
 from . import db as _db
@@ -29,13 +34,22 @@ _model = None
 _model_lock = threading.RLock()
 def _get_model():
+    """Lazily load and cache the embedding model for the active backend.
+    'onnx' (default) → fastembed.TextEmbedding (ONNX Runtime, no PyTorch).
+    'sentence-transformers' → the legacy PyTorch path (opt-in fallback).
+    """
     global _model
     if not SEMANTIC_AVAILABLE:
         return None
     with _model_lock:
         if _model is None:
-            from sentence_transformers import SentenceTransformer  # type: ignore
-            _model = SentenceTransformer(EMBED_MODEL_NAME)
+            if EMBED_BACKEND == "sentence-transformers":
+                from sentence_transformers import SentenceTransformer  # type: ignore
+                _model = SentenceTransformer(EMBED_MODEL_NAME)
+            else:  # 'onnx' (default)
+                from fastembed import TextEmbedding  # type: ignore
+                _model = TextEmbedding(model_name=FASTEMBED_MODEL_ID)
         return _model
@@ -66,23 +80,55 @@ def unload_model() -> bool:
     del model
     return True
-def _embed(text: str) -> Optional[bytes]:
+def _encode(texts: list[str]):
+    """Backend-agnostic batch encode → L2-normalized float32 array of shape
+    (len(texts), EMBED_DIM), or None when semantic search is unavailable.
+    Both backends are normalized to unit length here so the dot product used
+    by the vec0 and legacy paths equals cosine similarity, regardless of
+    whether the backend already normalizes.
+    """
     with _model_lock:
         m = _get_model()
         if m is None:
             return None
-        v = m.encode([text], normalize_embeddings=True)[0].astype("float32")
-    return v.tobytes()
+        import numpy as np  # type: ignore
+        if EMBED_BACKEND == "sentence-transformers":
+            arr = np.asarray(m.encode(list(texts)), dtype="float32")
+        else:  # fastembed generator → stack
+            arr = np.asarray(list(m.embed(list(texts))), dtype="float32")
+    norms = np.linalg.norm(arr, axis=1, keepdims=True)
+    norms[norms == 0] = 1.0
+    return (arr / norms).astype("float32")
+def encode_many(texts: list[str]):
+    """Public batch encoder for the migration command. Returns the same
+    normalized float32 array as `_encode`, or None when unavailable."""
+    return _encode(texts)
+def embed_tag(blob: Optional[bytes]) -> Optional[str]:
+    """Backend label to store in the `embed_backend` column alongside a freshly
+    written embedding blob. None when no embedding was produced, so legacy /
+    NULL-vector rows stay untagged."""
+    return EMBED_BACKEND if blob is not None else None
+def _embed(text: str) -> Optional[bytes]:
+    arr = _encode([text])
+    if arr is None:
+        return None
+    return arr[0].astype("float32").tobytes()
 def _cosine_search(conn: sqlite3.Connection, query: str, k: int) -> list[dict]:
     """Top-k cosine over notes. Uses vec0 ANN when available."""
-    with _model_lock:
-        m = _get_model()
-        if m is None:
-            return []
-        import numpy as np  # type: ignore
-        qv = m.encode([query], normalize_embeddings=True)[0].astype("float32")
+    import numpy as np  # type: ignore
+    qa = _encode([query])
+    if qa is None:
+        return []
+    qv = qa[0]
     if _vec_on():
         try:
             return _vec0_notes_search(conn, qv.tobytes(), k)
@@ -131,12 +177,11 @@ def _vec0_notes_search(conn: sqlite3.Connection, qv_blob: bytes,
 def _dialog_cosine_search(conn, query: str, k: int) -> list[dict]:
     """Top-k cosine over dialog_messages. Uses vec0 ANN when available."""
-    with _model_lock:
-        m = _get_model()
-        if m is None:
-            return []
-        import numpy as np  # type: ignore
-        qv = m.encode([query], normalize_embeddings=True)[0].astype("float32")
+    import numpy as np  # type: ignore
+    qa = _encode([query])
+    if qa is None:
+        return []
+    qv = qa[0]
     if _vec_on():
         try:
             return _vec0_dialog_search(conn, qv.tobytes(), k)

{threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/ingest.py RENAMED Viewed

@@ -18,7 +18,7 @@ from .config import (
     SEMANTIC_AVAILABLE,
 )
 from .db import get_db
-from .embeddings import _embed
+from .embeddings import _embed, embed_tag
 _ingest_thread: Optional[threading.Thread] = None
 _ingest_lock = threading.Lock()
@@ -215,11 +215,11 @@ def _ingest_file(conn: sqlite3.Connection, fp: Path, max_msgs: int,
             emb = _embed(text[:2000]) if SEMANTIC_AVAILABLE else None
             conn.execute(
                 "INSERT INTO dialog_messages (uuid, source, project, session_id, "
-                "role, content, model, created_at, embedding) "
-                "VALUES (?,?,?,?,?,?,?,?,?)",
+                "role, content, model, created_at, embedding, embed_backend) "
+                "VALUES (?,?,?,?,?,?,?,?,?,?)",
                 (nm.uuid, adapter.name, adapter.project_label(fp),
                  nm.session_id, nm.role, text,
-                 nm.model, nm.created_at, emb)
+                 nm.model, nm.created_at, emb, embed_tag(emb))
             )
             try:
                 conn.execute(
@@ -381,7 +381,7 @@ def _backfill_note_embeddings(conn: sqlite3.Connection, max_n: int = 20) -> int:
         return 0
     if not rows:
         return 0
-    from .embeddings import _embed, _vec_upsert_note
+    from .embeddings import _embed, _vec_upsert_note, embed_tag
     updated = 0
     for r in rows:
         try:
@@ -392,8 +392,8 @@ def _backfill_note_embeddings(conn: sqlite3.Connection, max_n: int = 20) -> int:
             continue
         try:
             conn.execute(
-                "UPDATE notes SET embedding=? WHERE id=?",
-                (emb, r["id"]),
+                "UPDATE notes SET embedding=?, embed_backend=? WHERE id=?",
+                (emb, embed_tag(emb), r["id"]),
             )
             _vec_upsert_note(conn, r["id"], emb)
             updated += 1

threadkeeper 0.6.2__tar.gz → 0.7.0__tar.gz

threadkeeper 0.6.2tar.gz → 0.7.0tar.gz