threadkeeper 0.6.2__tar.gz → 0.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. {threadkeeper-0.6.2/threadkeeper.egg-info → threadkeeper-0.7.0}/PKG-INFO +39 -4
  2. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/README.md +32 -2
  3. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/pyproject.toml +17 -2
  4. threadkeeper-0.7.0/tests/test_onnx_embeddings.py +133 -0
  5. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/config.py +36 -7
  6. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/db.py +9 -2
  7. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/embeddings.py +63 -18
  8. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/ingest.py +7 -7
  9. threadkeeper-0.7.0/threadkeeper/migrate_embeddings.py +146 -0
  10. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/tools/consolidate.py +4 -4
  11. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/tools/extract.py +3 -3
  12. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/tools/pickup.py +4 -3
  13. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/tools/session.py +4 -4
  14. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/tools/threads.py +6 -6
  15. {threadkeeper-0.6.2 → threadkeeper-0.7.0/threadkeeper.egg-info}/PKG-INFO +39 -4
  16. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper.egg-info/SOURCES.txt +2 -0
  17. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper.egg-info/entry_points.txt +1 -0
  18. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper.egg-info/requires.txt +6 -0
  19. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/LICENSE +0 -0
  20. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/setup.cfg +0 -0
  21. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_adapters.py +0 -0
  22. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_brief_sections.py +0 -0
  23. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_candidate_reviewer.py +0 -0
  24. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_core_memory.py +0 -0
  25. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_curator.py +0 -0
  26. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_delegated_search.py +0 -0
  27. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_dialectic.py +0 -0
  28. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_dialectic_tier.py +0 -0
  29. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_error_paths.py +0 -0
  30. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_extract_daemon.py +0 -0
  31. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_i18n_multilang.py +0 -0
  32. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_identity.py +0 -0
  33. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_lessons.py +0 -0
  34. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_memory_guard.py +0 -0
  35. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_missed_spawns.py +0 -0
  36. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_nudges.py +0 -0
  37. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_process_health.py +0 -0
  38. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_shadow_review.py +0 -0
  39. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_skill_hint.py +0 -0
  40. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_skill_tier.py +0 -0
  41. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_skill_use_parser.py +0 -0
  42. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_skill_watcher.py +0 -0
  43. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_skills.py +0 -0
  44. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_spawn_budget.py +0 -0
  45. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_spawn_config.py +0 -0
  46. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_spawn_hint.py +0 -0
  47. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_spawn_slim.py +0 -0
  48. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_threads.py +0 -0
  49. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_tools_smoke.py +0 -0
  50. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_validate_threads.py +0 -0
  51. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_vec_search.py +0 -0
  52. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/__init__.py +0 -0
  53. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/_mcp.py +0 -0
  54. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/_setup.py +0 -0
  55. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/adapters/__init__.py +0 -0
  56. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/adapters/_hook_helpers.py +0 -0
  57. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/adapters/base.py +0 -0
  58. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/adapters/claude_code.py +0 -0
  59. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/adapters/claude_desktop.py +0 -0
  60. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/adapters/codex.py +0 -0
  61. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/adapters/copilot.py +0 -0
  62. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/adapters/gemini.py +0 -0
  63. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/adapters/vscode.py +0 -0
  64. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/brief.py +0 -0
  65. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/candidate_reviewer.py +0 -0
  66. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/curator.py +0 -0
  67. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/extract_daemon.py +0 -0
  68. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/helpers.py +0 -0
  69. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/i18n.py +0 -0
  70. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/identity.py +0 -0
  71. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/lessons.py +0 -0
  72. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/memory_guard.py +0 -0
  73. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/nudges.py +0 -0
  74. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/process_health.py +0 -0
  75. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/review_prompts.py +0 -0
  76. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/search_proxy.py +0 -0
  77. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/server.py +0 -0
  78. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/shadow_review.py +0 -0
  79. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/skill_watcher.py +0 -0
  80. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/spawn_budget.py +0 -0
  81. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/spawn_config.py +0 -0
  82. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/tools/__init__.py +0 -0
  83. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/tools/candidate_reviewer.py +0 -0
  84. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/tools/concepts.py +0 -0
  85. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/tools/core_memory.py +0 -0
  86. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/tools/correlation.py +0 -0
  87. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/tools/curator.py +0 -0
  88. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/tools/dialectic.py +0 -0
  89. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/tools/dialog.py +0 -0
  90. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/tools/distill.py +0 -0
  91. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/tools/graph.py +0 -0
  92. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/tools/invariants.py +0 -0
  93. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/tools/lessons.py +0 -0
  94. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/tools/memory_guard.py +0 -0
  95. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/tools/missed_spawns.py +0 -0
  96. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/tools/peers.py +0 -0
  97. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/tools/probes.py +0 -0
  98. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/tools/process_health.py +0 -0
  99. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/tools/shadow_review.py +0 -0
  100. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/tools/skills.py +0 -0
  101. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/tools/spawn.py +0 -0
  102. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/tools/style.py +0 -0
  103. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/tools/validate.py +0 -0
  104. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper.egg-info/dependency_links.txt +0 -0
  105. {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: threadkeeper
3
- Version: 0.6.2
3
+ Version: 0.7.0
4
4
  Summary: Multi-agent shared brain across Claude Code/Desktop, Codex, Gemini, Copilot, VS Code. Cross-session memory, self-improving skill loops, inter-agent signaling — one local MCP server.
5
5
  Author: thread-keeper contributors
6
6
  License: MIT
@@ -24,12 +24,17 @@ Description-Content-Type: text/markdown
24
24
  License-File: LICENSE
25
25
  Requires-Dist: mcp>=1.0.0
26
26
  Provides-Extra: semantic
27
- Requires-Dist: sentence-transformers>=2.2.0; extra == "semantic"
27
+ Requires-Dist: fastembed>=0.3; extra == "semantic"
28
28
  Requires-Dist: numpy>=1.24.0; extra == "semantic"
29
29
  Requires-Dist: sqlite-vec>=0.1.9; extra == "semantic"
30
+ Provides-Extra: semantic-st
31
+ Requires-Dist: sentence-transformers>=2.2.0; extra == "semantic-st"
32
+ Requires-Dist: numpy>=1.24.0; extra == "semantic-st"
33
+ Requires-Dist: sqlite-vec>=0.1.9; extra == "semantic-st"
30
34
  Provides-Extra: dev
31
35
  Requires-Dist: pytest>=8.0; extra == "dev"
32
36
  Requires-Dist: pytest-cov>=5.0; extra == "dev"
37
+ Requires-Dist: pytest-forked>=1.6; extra == "dev"
33
38
  Dynamic: license-file
34
39
 
35
40
  # thread-keeper
@@ -189,7 +194,7 @@ autonomous learning daemons cannot recursively start inside review forks.
189
194
  A daemon measures combined child RSS every 10 s; admission control
190
195
  refuses a new spawn that would exceed `THREADKEEPER_SPAWN_BUDGET_MB`
191
196
  (3 GB default). Slim children that need semantic search delegate to the
192
- parent via `search_via_parent` — no per-child copy of sentence-transformers.
197
+ parent via `search_via_parent` — no per-child copy of the embedding model.
193
198
 
194
199
  ### Learning loops
195
200
 
@@ -435,7 +440,9 @@ The most-used env knobs (full list in `threadkeeper/config.py`):
435
440
  | `THREADKEEPER_MEMORY_GUARD_RETIRE_LIVE` | "" (off) | allow retiring parent-alive MCP servers; off protects live clients |
436
441
  | `THREADKEEPER_MEMORY_GUARD_NOTIFY` | "1" | send macOS desktop notification when possible |
437
442
  | `THREADKEEPER_INGEST_INTERVAL_S` | 3 | transcript ingest tick (s) |
438
- | `THREADKEEPER_NO_EMBEDDINGS` | "" | force-disable sentence-transformers |
443
+ | `THREADKEEPER_NO_EMBEDDINGS` | "" | force-disable the embedding model (FTS5 + delegate only) |
444
+ | `THREADKEEPER_EMBED_BACKEND` | `onnx` | embedding runtime: `onnx` (fastembed, no PyTorch) or `sentence-transformers` (legacy fallback) |
445
+ | `THREADKEEPER_EMBED_MODEL` | `paraphrase-multilingual-MiniLM-L12-v2` | 384-dim cross-lingual embedding model |
439
446
  | `THREADKEEPER_SPAWNED_CHILD` | "" | spawn-internal marker; disables autonomous daemons in children |
440
447
  | `THREADKEEPER_SKILL_NUDGE_INTERVAL` | 10 | events between `skill_hint` nudges |
441
448
 
@@ -525,6 +532,34 @@ Hooks and small runtime artifacts: `~/.threadkeeper/hooks/`.
525
532
 
526
533
  ---
527
534
 
535
+ ## Embeddings
536
+
537
+ Semantic search runs `paraphrase-multilingual-MiniLM-L12-v2` (384-dim,
538
+ RU+EN+50 langs). The default backend is **fastembed / ONNX Runtime** — no
539
+ PyTorch. A model-loaded process sits at ~700 MB physical footprint
540
+ (~850 MB RSS), down from ~1.8 GB on the PyTorch backend.
541
+
542
+ A **sentence-transformers** (PyTorch) backend is kept as an opt-in fallback.
543
+ It is heavier (~1.8 GB RSS) and produces vectors that are *not numerically
544
+ identical* to the ONNX backend's, so switching backends warrants a recompute:
545
+
546
+ ```bash
547
+ # Install the fallback runtime and switch to it:
548
+ pip install -e '.[semantic-st]'
549
+ export THREADKEEPER_EMBED_BACKEND=sentence-transformers
550
+
551
+ # After any backend switch, homogenize the stored corpus so queries and
552
+ # stored vectors live in the same space:
553
+ tk-migrate-embeddings --all # or --notes-only / --dialog-only
554
+ tk-migrate-embeddings --dry-run # report stale counts only
555
+ ```
556
+
557
+ The migration is batched, resumable, and idempotent (a second run finds
558
+ nothing stale). Both backends emit 384-dim vectors, so the `vec0` schema is
559
+ unchanged.
560
+
561
+ ---
562
+
528
563
  ## Verifying ingest across CLIs
529
564
 
530
565
  ```bash
@@ -155,7 +155,7 @@ autonomous learning daemons cannot recursively start inside review forks.
155
155
  A daemon measures combined child RSS every 10 s; admission control
156
156
  refuses a new spawn that would exceed `THREADKEEPER_SPAWN_BUDGET_MB`
157
157
  (3 GB default). Slim children that need semantic search delegate to the
158
- parent via `search_via_parent` — no per-child copy of sentence-transformers.
158
+ parent via `search_via_parent` — no per-child copy of the embedding model.
159
159
 
160
160
  ### Learning loops
161
161
 
@@ -401,7 +401,9 @@ The most-used env knobs (full list in `threadkeeper/config.py`):
401
401
  | `THREADKEEPER_MEMORY_GUARD_RETIRE_LIVE` | "" (off) | allow retiring parent-alive MCP servers; off protects live clients |
402
402
  | `THREADKEEPER_MEMORY_GUARD_NOTIFY` | "1" | send macOS desktop notification when possible |
403
403
  | `THREADKEEPER_INGEST_INTERVAL_S` | 3 | transcript ingest tick (s) |
404
- | `THREADKEEPER_NO_EMBEDDINGS` | "" | force-disable sentence-transformers |
404
+ | `THREADKEEPER_NO_EMBEDDINGS` | "" | force-disable the embedding model (FTS5 + delegate only) |
405
+ | `THREADKEEPER_EMBED_BACKEND` | `onnx` | embedding runtime: `onnx` (fastembed, no PyTorch) or `sentence-transformers` (legacy fallback) |
406
+ | `THREADKEEPER_EMBED_MODEL` | `paraphrase-multilingual-MiniLM-L12-v2` | 384-dim cross-lingual embedding model |
405
407
  | `THREADKEEPER_SPAWNED_CHILD` | "" | spawn-internal marker; disables autonomous daemons in children |
406
408
  | `THREADKEEPER_SKILL_NUDGE_INTERVAL` | 10 | events between `skill_hint` nudges |
407
409
 
@@ -491,6 +493,34 @@ Hooks and small runtime artifacts: `~/.threadkeeper/hooks/`.
491
493
 
492
494
  ---
493
495
 
496
+ ## Embeddings
497
+
498
+ Semantic search runs `paraphrase-multilingual-MiniLM-L12-v2` (384-dim,
499
+ RU+EN+50 langs). The default backend is **fastembed / ONNX Runtime** — no
500
+ PyTorch. A model-loaded process sits at ~700 MB physical footprint
501
+ (~850 MB RSS), down from ~1.8 GB on the PyTorch backend.
502
+
503
+ A **sentence-transformers** (PyTorch) backend is kept as an opt-in fallback.
504
+ It is heavier (~1.8 GB RSS) and produces vectors that are *not numerically
505
+ identical* to the ONNX backend's, so switching backends warrants a recompute:
506
+
507
+ ```bash
508
+ # Install the fallback runtime and switch to it:
509
+ pip install -e '.[semantic-st]'
510
+ export THREADKEEPER_EMBED_BACKEND=sentence-transformers
511
+
512
+ # After any backend switch, homogenize the stored corpus so queries and
513
+ # stored vectors live in the same space:
514
+ tk-migrate-embeddings --all # or --notes-only / --dialog-only
515
+ tk-migrate-embeddings --dry-run # report stale counts only
516
+ ```
517
+
518
+ The migration is batched, resumable, and idempotent (a second run finds
519
+ nothing stale). Both backends emit 384-dim vectors, so the `vec0` schema is
520
+ unchanged.
521
+
522
+ ---
523
+
494
524
  ## Verifying ingest across CLIs
495
525
 
496
526
  ```bash
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "threadkeeper"
7
- version = "0.6.2"
7
+ version = "0.7.0"
8
8
  description = "Multi-agent shared brain across Claude Code/Desktop, Codex, Gemini, Copilot, VS Code. Cross-session memory, self-improving skill loops, inter-agent signaling — one local MCP server."
9
9
  requires-python = ">=3.11"
10
10
  authors = [{ name = "thread-keeper contributors" }]
@@ -32,15 +32,27 @@ dependencies = [
32
32
  [project.optional-dependencies]
33
33
  # Semantic cross-language search + sub-linear vector index. Recommended
34
34
  # for any real use — without it, dialog_search falls back to FTS5 only.
35
+ # Default backend is fastembed/ONNX Runtime: no PyTorch, ~700MB footprint.
35
36
  semantic = [
37
+ "fastembed>=0.3",
38
+ "numpy>=1.24.0",
39
+ "sqlite-vec>=0.1.9",
40
+ ]
41
+ # Legacy PyTorch backend, kept as an opt-in fallback. Install this AND set
42
+ # THREADKEEPER_EMBED_BACKEND=sentence-transformers to use it. ~1.8GB RSS.
43
+ semantic-st = [
36
44
  "sentence-transformers>=2.2.0",
37
45
  "numpy>=1.24.0",
38
46
  "sqlite-vec>=0.1.9",
39
47
  ]
40
- # Test runner + coverage.
48
+ # Test runner + coverage. pytest-forked isolates each test in its own
49
+ # process: the per-test package re-import (tests/conftest.py) accumulates
50
+ # native ONNX/tokenizer thread pools that can deadlock sqlite finalize in a
51
+ # single long-lived process, so CI runs `pytest --forked`.
41
52
  dev = [
42
53
  "pytest>=8.0",
43
54
  "pytest-cov>=5.0",
55
+ "pytest-forked>=1.6",
44
56
  ]
45
57
 
46
58
  [project.urls]
@@ -54,6 +66,9 @@ Changelog = "https://github.com/po4erk91/thread-keeper/releases"
54
66
  # After `pip install threadkeeper`, the user gets `thread-keeper-setup`
55
67
  # directly on PATH. Equivalent to `python -m threadkeeper._setup`.
56
68
  thread-keeper-setup = "threadkeeper._setup:main"
69
+ # Recompute stored embeddings with the active backend (e.g. after switching to
70
+ # the ONNX default). Equivalent to `python -m threadkeeper.migrate_embeddings`.
71
+ tk-migrate-embeddings = "threadkeeper.migrate_embeddings:main"
57
72
 
58
73
  [tool.setuptools.packages.find]
59
74
  include = ["threadkeeper*"]
@@ -0,0 +1,133 @@
1
+ """ONNX embedding backend + tk-migrate-embeddings.
2
+
3
+ Verifies that:
4
+ - the active backend encodes to L2-normalized 384-dim float32 vectors
5
+ - embed_tag stamps the active backend for a real blob, None otherwise
6
+ - freshly inserted notes carry the embed_backend tag
7
+ - the migration recomputes stale (NULL-tagged) rows, tags them, and is
8
+ idempotent + dry-run-safe
9
+
10
+ Skips entirely when no embedding backend is installed.
11
+ """
12
+ from __future__ import annotations
13
+
14
+ import time
15
+
16
+ import pytest
17
+
18
+ pytestmark = pytest.mark.slow # model warmup on first encode
19
+
20
+
21
+ def _tool(pkg, name):
22
+ return pkg["mcp"]._tool_manager._tools[name].fn
23
+
24
+
25
+ @pytest.fixture()
26
+ def sem_pkg(fresh_mp):
27
+ """Fresh package against a clean tmp DB; skip if semantic search is off."""
28
+ if not fresh_mp["config"].SEMANTIC_AVAILABLE:
29
+ pytest.skip("no embedding backend installed in this environment")
30
+ return fresh_mp
31
+
32
+
33
+ def _seed_legacy_notes(conn, n: int):
34
+ """Insert n notes with a real embedding blob but a NULL backend tag,
35
+ simulating rows written before the ONNX migration."""
36
+ from threadkeeper import embeddings as emb
37
+ for i in range(n):
38
+ blob = emb._embed(f"legacy seeded note {i} about webhooks and retries")
39
+ conn.execute(
40
+ "INSERT INTO notes (content, kind, created_at, embedding, embed_backend) "
41
+ "VALUES (?,?,?,?,NULL)",
42
+ (f"legacy seeded note {i}", "insight", int(time.time()), blob),
43
+ )
44
+ conn.commit()
45
+
46
+
47
+ # ── encode primitives ────────────────────────────────────────────────
48
+
49
+ def test_encode_is_normalized_384_float32(sem_pkg):
50
+ import numpy as np
51
+ from threadkeeper import embeddings as emb
52
+ arr = emb._encode(["привет мир", "hello world"])
53
+ assert arr is not None
54
+ assert arr.shape == (2, 384)
55
+ assert arr.dtype == np.dtype("float32")
56
+ assert np.allclose(np.linalg.norm(arr, axis=1), 1.0, atol=1e-3)
57
+
58
+
59
+ def test_encode_is_cross_lingual(sem_pkg):
60
+ """A RU/EN translation pair must score higher than an unrelated phrase."""
61
+ from threadkeeper import embeddings as emb
62
+ v = emb._encode(["кошка", "cat", "quarterly financial report"])
63
+ assert float(v[0] @ v[1]) > float(v[0] @ v[2])
64
+
65
+
66
+ def test_embed_tag(sem_pkg):
67
+ from threadkeeper import embeddings as emb
68
+ active = sem_pkg["config"].EMBED_BACKEND
69
+ assert emb.embed_tag(b"\x00\x01") == active
70
+ assert emb.embed_tag(None) is None
71
+
72
+
73
+ # ── write-path tagging ───────────────────────────────────────────────
74
+
75
+ def test_new_note_carries_backend_tag(sem_pkg):
76
+ tid = _tool(sem_pkg, "open_thread")(question="backend tag test")
77
+ _tool(sem_pkg, "note")(thread_id=tid,
78
+ content="tagged note about idempotency keys",
79
+ kind="insight")
80
+ conn = sem_pkg["db"].get_db()
81
+ active = sem_pkg["config"].EMBED_BACKEND
82
+ row = conn.execute(
83
+ "SELECT embedding, embed_backend FROM notes "
84
+ "WHERE thread_id=? ORDER BY id DESC LIMIT 1",
85
+ (tid,),
86
+ ).fetchone()
87
+ assert row["embedding"] is not None
88
+ assert row["embed_backend"] == active
89
+
90
+
91
+ # ── migration ────────────────────────────────────────────────────────
92
+
93
+ def test_migration_recomputes_tags_and_is_idempotent(sem_pkg):
94
+ from threadkeeper import migrate_embeddings as mig
95
+ active = sem_pkg["config"].EMBED_BACKEND
96
+ conn = sem_pkg["db"].get_db()
97
+ _seed_legacy_notes(conn, 3)
98
+
99
+ assert mig._count_stale(conn, "notes", active) == 3
100
+
101
+ rc = mig.run(do_notes=True, do_dialog=False, batch=2,
102
+ dry_run=False, log=lambda _m: None)
103
+ assert rc == 0
104
+ assert mig._count_stale(conn, "notes", active) == 0
105
+ tagged = conn.execute(
106
+ "SELECT COUNT(*) FROM notes WHERE embed_backend=?", (active,)
107
+ ).fetchone()[0]
108
+ assert tagged >= 3
109
+
110
+ # idempotent: a second pass finds nothing stale and changes nothing.
111
+ rc2 = mig.run(do_notes=True, do_dialog=False, batch=2,
112
+ dry_run=False, log=lambda _m: None)
113
+ assert rc2 == 0
114
+ assert mig._count_stale(conn, "notes", active) == 0
115
+
116
+
117
+ def test_migration_dry_run_writes_nothing(sem_pkg):
118
+ from threadkeeper import migrate_embeddings as mig
119
+ active = sem_pkg["config"].EMBED_BACKEND
120
+ conn = sem_pkg["db"].get_db()
121
+ _seed_legacy_notes(conn, 2)
122
+
123
+ assert mig._count_stale(conn, "notes", active) == 2
124
+ mig.run(do_notes=True, do_dialog=False, batch=10,
125
+ dry_run=True, log=lambda _m: None)
126
+ # still stale — dry run must not touch the rows
127
+ assert mig._count_stale(conn, "notes", active) == 2
128
+
129
+
130
+ def test_migration_requires_a_scope_flag(sem_pkg):
131
+ from threadkeeper import migrate_embeddings as mig
132
+ with pytest.raises(SystemExit):
133
+ mig.main([]) # argparse error → SystemExit(2)
@@ -2,6 +2,7 @@
2
2
  Imported wherever a constant or config is needed; cheap to import."""
3
3
  from __future__ import annotations
4
4
 
5
+ import importlib.util
5
6
  import os
6
7
  from pathlib import Path
7
8
  from typing import Optional
@@ -15,6 +16,23 @@ EMBED_MODEL_NAME: str = os.environ.get(
15
16
  "paraphrase-multilingual-MiniLM-L12-v2", # 118 MB, RU+EN cross-lingual
16
17
  )
17
18
 
19
+ # Embedding runtime backend. 'onnx' (default) runs the model through fastembed /
20
+ # ONNX Runtime — no PyTorch, ~700MB footprint (vs ~1.8GB). 'sentence-transformers' is
21
+ # the legacy PyTorch path, kept as an opt-in fallback (install `.[semantic-st]`
22
+ # and set THREADKEEPER_EMBED_BACKEND=sentence-transformers). Both produce the
23
+ # same 384-dim vectors, but fastembed's are numerically NOT identical to ST's,
24
+ # so switching backends warrants a `tk-migrate-embeddings --all` recompute.
25
+ EMBED_BACKEND: str = os.environ.get(
26
+ "THREADKEEPER_EMBED_BACKEND", "onnx"
27
+ ).strip().lower()
28
+
29
+ # fastembed addresses the model under its sentence-transformers org prefix;
30
+ # SentenceTransformer accepts the bare name. Normalize for the ONNX backend.
31
+ FASTEMBED_MODEL_ID: str = (
32
+ EMBED_MODEL_NAME if "/" in EMBED_MODEL_NAME
33
+ else f"sentence-transformers/{EMBED_MODEL_NAME}"
34
+ )
35
+
18
36
  DB_PATH.parent.mkdir(parents=True, exist_ok=True)
19
37
 
20
38
  # One-shot migration from the historical name `memory_partner`. If the new
@@ -52,15 +70,26 @@ NO_EMBEDDINGS: bool = os.environ.get(
52
70
  # Optional semantic search. If sentence-transformers is not installed OR the
53
71
  # no-embeddings opt-out is set, fall back to FTS5 keyword matching + delegate.
54
72
  # Brief still works either way.
73
+ def _installed(*mods: str) -> bool:
74
+ """True if every module is importable, checked WITHOUT importing it.
75
+
76
+ `find_spec` locates the module via the import machinery but never executes
77
+ it — so probing availability here doesn't pull PyTorch / ONNX Runtime /
78
+ tokenizers (and their thread pools) into every process that imports config.
79
+ The heavy import stays lazy in `embeddings._get_model()`.
80
+ """
81
+ try:
82
+ return all(importlib.util.find_spec(m) is not None for m in mods)
83
+ except (ImportError, ValueError):
84
+ return False
85
+
86
+
55
87
  if NO_EMBEDDINGS:
56
88
  SEMANTIC_AVAILABLE: bool = False
57
- else:
58
- try:
59
- from sentence_transformers import SentenceTransformer # type: ignore # noqa: F401
60
- import numpy as np # type: ignore # noqa: F401
61
- SEMANTIC_AVAILABLE = True
62
- except Exception:
63
- SEMANTIC_AVAILABLE = False
89
+ elif EMBED_BACKEND == "sentence-transformers":
90
+ SEMANTIC_AVAILABLE = _installed("sentence_transformers", "numpy")
91
+ else: # 'onnx' (default)
92
+ SEMANTIC_AVAILABLE = _installed("fastembed", "numpy")
64
93
 
65
94
  # Client label used for `presence`/`sessions` rows.
66
95
  CLIENT_LABEL: str = os.environ.get("THREADKEEPER_CLIENT", "claude")
@@ -72,7 +72,8 @@ CREATE TABLE IF NOT EXISTS notes (
72
72
  kind TEXT NOT NULL,
73
73
  created_at INTEGER NOT NULL,
74
74
  session_id TEXT,
75
- embedding BLOB
75
+ embedding BLOB,
76
+ embed_backend TEXT -- backend that produced `embedding`; NULL = legacy
76
77
  );
77
78
 
78
79
  CREATE TABLE IF NOT EXISTS verbatim (
@@ -143,7 +144,8 @@ CREATE TABLE IF NOT EXISTS dialog_messages (
143
144
  content TEXT NOT NULL, -- concatenated text blocks
144
145
  model TEXT,
145
146
  created_at INTEGER NOT NULL,
146
- embedding BLOB
147
+ embedding BLOB,
148
+ embed_backend TEXT -- backend that produced `embedding`; NULL = legacy
147
149
  );
148
150
 
149
151
  CREATE TABLE IF NOT EXISTS ingest_state (
@@ -500,6 +502,11 @@ def get_db() -> sqlite3.Connection:
500
502
  "ALTER TABLE skill_usage ADD COLUMN wrong_count "
501
503
  "INTEGER NOT NULL DEFAULT 0",
502
504
  "ALTER TABLE skill_usage ADD COLUMN last_wrong_at INTEGER",
505
+ # Embedding backend tag. NULL = legacy (sentence-transformers, pre-ONNX
506
+ # migration). New/recomputed rows carry 'onnx' or 'sentence-transformers'
507
+ # so `tk-migrate-embeddings` can find stale vectors and skip done ones.
508
+ "ALTER TABLE notes ADD COLUMN embed_backend TEXT",
509
+ "ALTER TABLE dialog_messages ADD COLUMN embed_backend TEXT",
503
510
  ):
504
511
  try:
505
512
  conn.execute(ddl)
@@ -17,7 +17,12 @@ import sqlite3
17
17
  import threading
18
18
  from typing import Optional
19
19
 
20
- from .config import SEMANTIC_AVAILABLE, EMBED_MODEL_NAME
20
+ from .config import (
21
+ SEMANTIC_AVAILABLE,
22
+ EMBED_MODEL_NAME,
23
+ EMBED_BACKEND,
24
+ FASTEMBED_MODEL_ID,
25
+ )
21
26
  from . import db as _db
22
27
 
23
28
 
@@ -29,13 +34,22 @@ _model = None
29
34
  _model_lock = threading.RLock()
30
35
 
31
36
  def _get_model():
37
+ """Lazily load and cache the embedding model for the active backend.
38
+
39
+ 'onnx' (default) → fastembed.TextEmbedding (ONNX Runtime, no PyTorch).
40
+ 'sentence-transformers' → the legacy PyTorch path (opt-in fallback).
41
+ """
32
42
  global _model
33
43
  if not SEMANTIC_AVAILABLE:
34
44
  return None
35
45
  with _model_lock:
36
46
  if _model is None:
37
- from sentence_transformers import SentenceTransformer # type: ignore
38
- _model = SentenceTransformer(EMBED_MODEL_NAME)
47
+ if EMBED_BACKEND == "sentence-transformers":
48
+ from sentence_transformers import SentenceTransformer # type: ignore
49
+ _model = SentenceTransformer(EMBED_MODEL_NAME)
50
+ else: # 'onnx' (default)
51
+ from fastembed import TextEmbedding # type: ignore
52
+ _model = TextEmbedding(model_name=FASTEMBED_MODEL_ID)
39
53
  return _model
40
54
 
41
55
 
@@ -66,23 +80,55 @@ def unload_model() -> bool:
66
80
  del model
67
81
  return True
68
82
 
69
- def _embed(text: str) -> Optional[bytes]:
83
+ def _encode(texts: list[str]):
84
+ """Backend-agnostic batch encode → L2-normalized float32 array of shape
85
+ (len(texts), EMBED_DIM), or None when semantic search is unavailable.
86
+
87
+ Both backends are normalized to unit length here so the dot product used
88
+ by the vec0 and legacy paths equals cosine similarity, regardless of
89
+ whether the backend already normalizes.
90
+ """
70
91
  with _model_lock:
71
92
  m = _get_model()
72
93
  if m is None:
73
94
  return None
74
- v = m.encode([text], normalize_embeddings=True)[0].astype("float32")
75
- return v.tobytes()
95
+ import numpy as np # type: ignore
96
+ if EMBED_BACKEND == "sentence-transformers":
97
+ arr = np.asarray(m.encode(list(texts)), dtype="float32")
98
+ else: # fastembed generator → stack
99
+ arr = np.asarray(list(m.embed(list(texts))), dtype="float32")
100
+ norms = np.linalg.norm(arr, axis=1, keepdims=True)
101
+ norms[norms == 0] = 1.0
102
+ return (arr / norms).astype("float32")
103
+
104
+
105
+ def encode_many(texts: list[str]):
106
+ """Public batch encoder for the migration command. Returns the same
107
+ normalized float32 array as `_encode`, or None when unavailable."""
108
+ return _encode(texts)
109
+
110
+
111
+ def embed_tag(blob: Optional[bytes]) -> Optional[str]:
112
+ """Backend label to store in the `embed_backend` column alongside a freshly
113
+ written embedding blob. None when no embedding was produced, so legacy /
114
+ NULL-vector rows stay untagged."""
115
+ return EMBED_BACKEND if blob is not None else None
116
+
117
+
118
+ def _embed(text: str) -> Optional[bytes]:
119
+ arr = _encode([text])
120
+ if arr is None:
121
+ return None
122
+ return arr[0].astype("float32").tobytes()
76
123
 
77
124
 
78
125
  def _cosine_search(conn: sqlite3.Connection, query: str, k: int) -> list[dict]:
79
126
  """Top-k cosine over notes. Uses vec0 ANN when available."""
80
- with _model_lock:
81
- m = _get_model()
82
- if m is None:
83
- return []
84
- import numpy as np # type: ignore
85
- qv = m.encode([query], normalize_embeddings=True)[0].astype("float32")
127
+ import numpy as np # type: ignore
128
+ qa = _encode([query])
129
+ if qa is None:
130
+ return []
131
+ qv = qa[0]
86
132
  if _vec_on():
87
133
  try:
88
134
  return _vec0_notes_search(conn, qv.tobytes(), k)
@@ -131,12 +177,11 @@ def _vec0_notes_search(conn: sqlite3.Connection, qv_blob: bytes,
131
177
 
132
178
  def _dialog_cosine_search(conn, query: str, k: int) -> list[dict]:
133
179
  """Top-k cosine over dialog_messages. Uses vec0 ANN when available."""
134
- with _model_lock:
135
- m = _get_model()
136
- if m is None:
137
- return []
138
- import numpy as np # type: ignore
139
- qv = m.encode([query], normalize_embeddings=True)[0].astype("float32")
180
+ import numpy as np # type: ignore
181
+ qa = _encode([query])
182
+ if qa is None:
183
+ return []
184
+ qv = qa[0]
140
185
  if _vec_on():
141
186
  try:
142
187
  return _vec0_dialog_search(conn, qv.tobytes(), k)
@@ -18,7 +18,7 @@ from .config import (
18
18
  SEMANTIC_AVAILABLE,
19
19
  )
20
20
  from .db import get_db
21
- from .embeddings import _embed
21
+ from .embeddings import _embed, embed_tag
22
22
 
23
23
  _ingest_thread: Optional[threading.Thread] = None
24
24
  _ingest_lock = threading.Lock()
@@ -215,11 +215,11 @@ def _ingest_file(conn: sqlite3.Connection, fp: Path, max_msgs: int,
215
215
  emb = _embed(text[:2000]) if SEMANTIC_AVAILABLE else None
216
216
  conn.execute(
217
217
  "INSERT INTO dialog_messages (uuid, source, project, session_id, "
218
- "role, content, model, created_at, embedding) "
219
- "VALUES (?,?,?,?,?,?,?,?,?)",
218
+ "role, content, model, created_at, embedding, embed_backend) "
219
+ "VALUES (?,?,?,?,?,?,?,?,?,?)",
220
220
  (nm.uuid, adapter.name, adapter.project_label(fp),
221
221
  nm.session_id, nm.role, text,
222
- nm.model, nm.created_at, emb)
222
+ nm.model, nm.created_at, emb, embed_tag(emb))
223
223
  )
224
224
  try:
225
225
  conn.execute(
@@ -381,7 +381,7 @@ def _backfill_note_embeddings(conn: sqlite3.Connection, max_n: int = 20) -> int:
381
381
  return 0
382
382
  if not rows:
383
383
  return 0
384
- from .embeddings import _embed, _vec_upsert_note
384
+ from .embeddings import _embed, _vec_upsert_note, embed_tag
385
385
  updated = 0
386
386
  for r in rows:
387
387
  try:
@@ -392,8 +392,8 @@ def _backfill_note_embeddings(conn: sqlite3.Connection, max_n: int = 20) -> int:
392
392
  continue
393
393
  try:
394
394
  conn.execute(
395
- "UPDATE notes SET embedding=? WHERE id=?",
396
- (emb, r["id"]),
395
+ "UPDATE notes SET embedding=?, embed_backend=? WHERE id=?",
396
+ (emb, embed_tag(emb), r["id"]),
397
397
  )
398
398
  _vec_upsert_note(conn, r["id"], emb)
399
399
  updated += 1