threadkeeper 0.6.2__tar.gz → 0.7.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {threadkeeper-0.6.2/threadkeeper.egg-info → threadkeeper-0.7.0}/PKG-INFO +39 -4
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/README.md +32 -2
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/pyproject.toml +17 -2
- threadkeeper-0.7.0/tests/test_onnx_embeddings.py +133 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/config.py +36 -7
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/db.py +9 -2
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/embeddings.py +63 -18
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/ingest.py +7 -7
- threadkeeper-0.7.0/threadkeeper/migrate_embeddings.py +146 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/tools/consolidate.py +4 -4
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/tools/extract.py +3 -3
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/tools/pickup.py +4 -3
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/tools/session.py +4 -4
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/tools/threads.py +6 -6
- {threadkeeper-0.6.2 → threadkeeper-0.7.0/threadkeeper.egg-info}/PKG-INFO +39 -4
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper.egg-info/SOURCES.txt +2 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper.egg-info/entry_points.txt +1 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper.egg-info/requires.txt +6 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/LICENSE +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/setup.cfg +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_adapters.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_brief_sections.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_candidate_reviewer.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_core_memory.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_curator.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_delegated_search.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_dialectic.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_dialectic_tier.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_error_paths.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_extract_daemon.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_i18n_multilang.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_identity.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_lessons.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_memory_guard.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_missed_spawns.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_nudges.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_process_health.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_shadow_review.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_skill_hint.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_skill_tier.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_skill_use_parser.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_skill_watcher.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_skills.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_spawn_budget.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_spawn_config.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_spawn_hint.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_spawn_slim.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_threads.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_tools_smoke.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_validate_threads.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/tests/test_vec_search.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/__init__.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/_mcp.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/_setup.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/adapters/__init__.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/adapters/_hook_helpers.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/adapters/base.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/adapters/claude_code.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/adapters/claude_desktop.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/adapters/codex.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/adapters/copilot.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/adapters/gemini.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/adapters/vscode.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/brief.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/candidate_reviewer.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/curator.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/extract_daemon.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/helpers.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/i18n.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/identity.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/lessons.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/memory_guard.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/nudges.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/process_health.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/review_prompts.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/search_proxy.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/server.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/shadow_review.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/skill_watcher.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/spawn_budget.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/spawn_config.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/tools/__init__.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/tools/candidate_reviewer.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/tools/concepts.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/tools/core_memory.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/tools/correlation.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/tools/curator.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/tools/dialectic.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/tools/dialog.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/tools/distill.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/tools/graph.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/tools/invariants.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/tools/lessons.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/tools/memory_guard.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/tools/missed_spawns.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/tools/peers.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/tools/probes.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/tools/process_health.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/tools/shadow_review.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/tools/skills.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/tools/spawn.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/tools/style.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper/tools/validate.py +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper.egg-info/dependency_links.txt +0 -0
- {threadkeeper-0.6.2 → threadkeeper-0.7.0}/threadkeeper.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: threadkeeper
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.7.0
|
|
4
4
|
Summary: Multi-agent shared brain across Claude Code/Desktop, Codex, Gemini, Copilot, VS Code. Cross-session memory, self-improving skill loops, inter-agent signaling — one local MCP server.
|
|
5
5
|
Author: thread-keeper contributors
|
|
6
6
|
License: MIT
|
|
@@ -24,12 +24,17 @@ Description-Content-Type: text/markdown
|
|
|
24
24
|
License-File: LICENSE
|
|
25
25
|
Requires-Dist: mcp>=1.0.0
|
|
26
26
|
Provides-Extra: semantic
|
|
27
|
-
Requires-Dist:
|
|
27
|
+
Requires-Dist: fastembed>=0.3; extra == "semantic"
|
|
28
28
|
Requires-Dist: numpy>=1.24.0; extra == "semantic"
|
|
29
29
|
Requires-Dist: sqlite-vec>=0.1.9; extra == "semantic"
|
|
30
|
+
Provides-Extra: semantic-st
|
|
31
|
+
Requires-Dist: sentence-transformers>=2.2.0; extra == "semantic-st"
|
|
32
|
+
Requires-Dist: numpy>=1.24.0; extra == "semantic-st"
|
|
33
|
+
Requires-Dist: sqlite-vec>=0.1.9; extra == "semantic-st"
|
|
30
34
|
Provides-Extra: dev
|
|
31
35
|
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
32
36
|
Requires-Dist: pytest-cov>=5.0; extra == "dev"
|
|
37
|
+
Requires-Dist: pytest-forked>=1.6; extra == "dev"
|
|
33
38
|
Dynamic: license-file
|
|
34
39
|
|
|
35
40
|
# thread-keeper
|
|
@@ -189,7 +194,7 @@ autonomous learning daemons cannot recursively start inside review forks.
|
|
|
189
194
|
A daemon measures combined child RSS every 10 s; admission control
|
|
190
195
|
refuses a new spawn that would exceed `THREADKEEPER_SPAWN_BUDGET_MB`
|
|
191
196
|
(3 GB default). Slim children that need semantic search delegate to the
|
|
192
|
-
parent via `search_via_parent` — no per-child copy of
|
|
197
|
+
parent via `search_via_parent` — no per-child copy of the embedding model.
|
|
193
198
|
|
|
194
199
|
### Learning loops
|
|
195
200
|
|
|
@@ -435,7 +440,9 @@ The most-used env knobs (full list in `threadkeeper/config.py`):
|
|
|
435
440
|
| `THREADKEEPER_MEMORY_GUARD_RETIRE_LIVE` | "" (off) | allow retiring parent-alive MCP servers; off protects live clients |
|
|
436
441
|
| `THREADKEEPER_MEMORY_GUARD_NOTIFY` | "1" | send macOS desktop notification when possible |
|
|
437
442
|
| `THREADKEEPER_INGEST_INTERVAL_S` | 3 | transcript ingest tick (s) |
|
|
438
|
-
| `THREADKEEPER_NO_EMBEDDINGS` | "" | force-disable
|
|
443
|
+
| `THREADKEEPER_NO_EMBEDDINGS` | "" | force-disable the embedding model (FTS5 + delegate only) |
|
|
444
|
+
| `THREADKEEPER_EMBED_BACKEND` | `onnx` | embedding runtime: `onnx` (fastembed, no PyTorch) or `sentence-transformers` (legacy fallback) |
|
|
445
|
+
| `THREADKEEPER_EMBED_MODEL` | `paraphrase-multilingual-MiniLM-L12-v2` | 384-dim cross-lingual embedding model |
|
|
439
446
|
| `THREADKEEPER_SPAWNED_CHILD` | "" | spawn-internal marker; disables autonomous daemons in children |
|
|
440
447
|
| `THREADKEEPER_SKILL_NUDGE_INTERVAL` | 10 | events between `skill_hint` nudges |
|
|
441
448
|
|
|
@@ -525,6 +532,34 @@ Hooks and small runtime artifacts: `~/.threadkeeper/hooks/`.
|
|
|
525
532
|
|
|
526
533
|
---
|
|
527
534
|
|
|
535
|
+
## Embeddings
|
|
536
|
+
|
|
537
|
+
Semantic search runs `paraphrase-multilingual-MiniLM-L12-v2` (384-dim,
|
|
538
|
+
RU+EN+50 langs). The default backend is **fastembed / ONNX Runtime** — no
|
|
539
|
+
PyTorch. A model-loaded process sits at ~700 MB physical footprint
|
|
540
|
+
(~850 MB RSS), down from ~1.8 GB on the PyTorch backend.
|
|
541
|
+
|
|
542
|
+
A **sentence-transformers** (PyTorch) backend is kept as an opt-in fallback.
|
|
543
|
+
It is heavier (~1.8 GB RSS) and produces vectors that are *not numerically
|
|
544
|
+
identical* to the ONNX backend's, so switching backends warrants a recompute:
|
|
545
|
+
|
|
546
|
+
```bash
|
|
547
|
+
# Install the fallback runtime and switch to it:
|
|
548
|
+
pip install -e '.[semantic-st]'
|
|
549
|
+
export THREADKEEPER_EMBED_BACKEND=sentence-transformers
|
|
550
|
+
|
|
551
|
+
# After any backend switch, homogenize the stored corpus so queries and
|
|
552
|
+
# stored vectors live in the same space:
|
|
553
|
+
tk-migrate-embeddings --all # or --notes-only / --dialog-only
|
|
554
|
+
tk-migrate-embeddings --dry-run # report stale counts only
|
|
555
|
+
```
|
|
556
|
+
|
|
557
|
+
The migration is batched, resumable, and idempotent (a second run finds
|
|
558
|
+
nothing stale). Both backends emit 384-dim vectors, so the `vec0` schema is
|
|
559
|
+
unchanged.
|
|
560
|
+
|
|
561
|
+
---
|
|
562
|
+
|
|
528
563
|
## Verifying ingest across CLIs
|
|
529
564
|
|
|
530
565
|
```bash
|
|
@@ -155,7 +155,7 @@ autonomous learning daemons cannot recursively start inside review forks.
|
|
|
155
155
|
A daemon measures combined child RSS every 10 s; admission control
|
|
156
156
|
refuses a new spawn that would exceed `THREADKEEPER_SPAWN_BUDGET_MB`
|
|
157
157
|
(3 GB default). Slim children that need semantic search delegate to the
|
|
158
|
-
parent via `search_via_parent` — no per-child copy of
|
|
158
|
+
parent via `search_via_parent` — no per-child copy of the embedding model.
|
|
159
159
|
|
|
160
160
|
### Learning loops
|
|
161
161
|
|
|
@@ -401,7 +401,9 @@ The most-used env knobs (full list in `threadkeeper/config.py`):
|
|
|
401
401
|
| `THREADKEEPER_MEMORY_GUARD_RETIRE_LIVE` | "" (off) | allow retiring parent-alive MCP servers; off protects live clients |
|
|
402
402
|
| `THREADKEEPER_MEMORY_GUARD_NOTIFY` | "1" | send macOS desktop notification when possible |
|
|
403
403
|
| `THREADKEEPER_INGEST_INTERVAL_S` | 3 | transcript ingest tick (s) |
|
|
404
|
-
| `THREADKEEPER_NO_EMBEDDINGS` | "" | force-disable
|
|
404
|
+
| `THREADKEEPER_NO_EMBEDDINGS` | "" | force-disable the embedding model (FTS5 + delegate only) |
|
|
405
|
+
| `THREADKEEPER_EMBED_BACKEND` | `onnx` | embedding runtime: `onnx` (fastembed, no PyTorch) or `sentence-transformers` (legacy fallback) |
|
|
406
|
+
| `THREADKEEPER_EMBED_MODEL` | `paraphrase-multilingual-MiniLM-L12-v2` | 384-dim cross-lingual embedding model |
|
|
405
407
|
| `THREADKEEPER_SPAWNED_CHILD` | "" | spawn-internal marker; disables autonomous daemons in children |
|
|
406
408
|
| `THREADKEEPER_SKILL_NUDGE_INTERVAL` | 10 | events between `skill_hint` nudges |
|
|
407
409
|
|
|
@@ -491,6 +493,34 @@ Hooks and small runtime artifacts: `~/.threadkeeper/hooks/`.
|
|
|
491
493
|
|
|
492
494
|
---
|
|
493
495
|
|
|
496
|
+
## Embeddings
|
|
497
|
+
|
|
498
|
+
Semantic search runs `paraphrase-multilingual-MiniLM-L12-v2` (384-dim,
|
|
499
|
+
RU+EN+50 langs). The default backend is **fastembed / ONNX Runtime** — no
|
|
500
|
+
PyTorch. A model-loaded process sits at ~700 MB physical footprint
|
|
501
|
+
(~850 MB RSS), down from ~1.8 GB on the PyTorch backend.
|
|
502
|
+
|
|
503
|
+
A **sentence-transformers** (PyTorch) backend is kept as an opt-in fallback.
|
|
504
|
+
It is heavier (~1.8 GB RSS) and produces vectors that are *not numerically
|
|
505
|
+
identical* to the ONNX backend's, so switching backends warrants a recompute:
|
|
506
|
+
|
|
507
|
+
```bash
|
|
508
|
+
# Install the fallback runtime and switch to it:
|
|
509
|
+
pip install -e '.[semantic-st]'
|
|
510
|
+
export THREADKEEPER_EMBED_BACKEND=sentence-transformers
|
|
511
|
+
|
|
512
|
+
# After any backend switch, homogenize the stored corpus so queries and
|
|
513
|
+
# stored vectors live in the same space:
|
|
514
|
+
tk-migrate-embeddings --all # or --notes-only / --dialog-only
|
|
515
|
+
tk-migrate-embeddings --dry-run # report stale counts only
|
|
516
|
+
```
|
|
517
|
+
|
|
518
|
+
The migration is batched, resumable, and idempotent (a second run finds
|
|
519
|
+
nothing stale). Both backends emit 384-dim vectors, so the `vec0` schema is
|
|
520
|
+
unchanged.
|
|
521
|
+
|
|
522
|
+
---
|
|
523
|
+
|
|
494
524
|
## Verifying ingest across CLIs
|
|
495
525
|
|
|
496
526
|
```bash
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "threadkeeper"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.7.0"
|
|
8
8
|
description = "Multi-agent shared brain across Claude Code/Desktop, Codex, Gemini, Copilot, VS Code. Cross-session memory, self-improving skill loops, inter-agent signaling — one local MCP server."
|
|
9
9
|
requires-python = ">=3.11"
|
|
10
10
|
authors = [{ name = "thread-keeper contributors" }]
|
|
@@ -32,15 +32,27 @@ dependencies = [
|
|
|
32
32
|
[project.optional-dependencies]
|
|
33
33
|
# Semantic cross-language search + sub-linear vector index. Recommended
|
|
34
34
|
# for any real use — without it, dialog_search falls back to FTS5 only.
|
|
35
|
+
# Default backend is fastembed/ONNX Runtime: no PyTorch, ~700MB footprint.
|
|
35
36
|
semantic = [
|
|
37
|
+
"fastembed>=0.3",
|
|
38
|
+
"numpy>=1.24.0",
|
|
39
|
+
"sqlite-vec>=0.1.9",
|
|
40
|
+
]
|
|
41
|
+
# Legacy PyTorch backend, kept as an opt-in fallback. Install this AND set
|
|
42
|
+
# THREADKEEPER_EMBED_BACKEND=sentence-transformers to use it. ~1.8GB RSS.
|
|
43
|
+
semantic-st = [
|
|
36
44
|
"sentence-transformers>=2.2.0",
|
|
37
45
|
"numpy>=1.24.0",
|
|
38
46
|
"sqlite-vec>=0.1.9",
|
|
39
47
|
]
|
|
40
|
-
# Test runner + coverage.
|
|
48
|
+
# Test runner + coverage. pytest-forked isolates each test in its own
|
|
49
|
+
# process: the per-test package re-import (tests/conftest.py) accumulates
|
|
50
|
+
# native ONNX/tokenizer thread pools that can deadlock sqlite finalize in a
|
|
51
|
+
# single long-lived process, so CI runs `pytest --forked`.
|
|
41
52
|
dev = [
|
|
42
53
|
"pytest>=8.0",
|
|
43
54
|
"pytest-cov>=5.0",
|
|
55
|
+
"pytest-forked>=1.6",
|
|
44
56
|
]
|
|
45
57
|
|
|
46
58
|
[project.urls]
|
|
@@ -54,6 +66,9 @@ Changelog = "https://github.com/po4erk91/thread-keeper/releases"
|
|
|
54
66
|
# After `pip install threadkeeper`, the user gets `thread-keeper-setup`
|
|
55
67
|
# directly on PATH. Equivalent to `python -m threadkeeper._setup`.
|
|
56
68
|
thread-keeper-setup = "threadkeeper._setup:main"
|
|
69
|
+
# Recompute stored embeddings with the active backend (e.g. after switching to
|
|
70
|
+
# the ONNX default). Equivalent to `python -m threadkeeper.migrate_embeddings`.
|
|
71
|
+
tk-migrate-embeddings = "threadkeeper.migrate_embeddings:main"
|
|
57
72
|
|
|
58
73
|
[tool.setuptools.packages.find]
|
|
59
74
|
include = ["threadkeeper*"]
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
"""ONNX embedding backend + tk-migrate-embeddings.
|
|
2
|
+
|
|
3
|
+
Verifies that:
|
|
4
|
+
- the active backend encodes to L2-normalized 384-dim float32 vectors
|
|
5
|
+
- embed_tag stamps the active backend for a real blob, None otherwise
|
|
6
|
+
- freshly inserted notes carry the embed_backend tag
|
|
7
|
+
- the migration recomputes stale (NULL-tagged) rows, tags them, and is
|
|
8
|
+
idempotent + dry-run-safe
|
|
9
|
+
|
|
10
|
+
Skips entirely when no embedding backend is installed.
|
|
11
|
+
"""
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import time
|
|
15
|
+
|
|
16
|
+
import pytest
|
|
17
|
+
|
|
18
|
+
pytestmark = pytest.mark.slow # model warmup on first encode
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _tool(pkg, name):
|
|
22
|
+
return pkg["mcp"]._tool_manager._tools[name].fn
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@pytest.fixture()
|
|
26
|
+
def sem_pkg(fresh_mp):
|
|
27
|
+
"""Fresh package against a clean tmp DB; skip if semantic search is off."""
|
|
28
|
+
if not fresh_mp["config"].SEMANTIC_AVAILABLE:
|
|
29
|
+
pytest.skip("no embedding backend installed in this environment")
|
|
30
|
+
return fresh_mp
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _seed_legacy_notes(conn, n: int):
|
|
34
|
+
"""Insert n notes with a real embedding blob but a NULL backend tag,
|
|
35
|
+
simulating rows written before the ONNX migration."""
|
|
36
|
+
from threadkeeper import embeddings as emb
|
|
37
|
+
for i in range(n):
|
|
38
|
+
blob = emb._embed(f"legacy seeded note {i} about webhooks and retries")
|
|
39
|
+
conn.execute(
|
|
40
|
+
"INSERT INTO notes (content, kind, created_at, embedding, embed_backend) "
|
|
41
|
+
"VALUES (?,?,?,?,NULL)",
|
|
42
|
+
(f"legacy seeded note {i}", "insight", int(time.time()), blob),
|
|
43
|
+
)
|
|
44
|
+
conn.commit()
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
# ── encode primitives ────────────────────────────────────────────────
|
|
48
|
+
|
|
49
|
+
def test_encode_is_normalized_384_float32(sem_pkg):
|
|
50
|
+
import numpy as np
|
|
51
|
+
from threadkeeper import embeddings as emb
|
|
52
|
+
arr = emb._encode(["привет мир", "hello world"])
|
|
53
|
+
assert arr is not None
|
|
54
|
+
assert arr.shape == (2, 384)
|
|
55
|
+
assert arr.dtype == np.dtype("float32")
|
|
56
|
+
assert np.allclose(np.linalg.norm(arr, axis=1), 1.0, atol=1e-3)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def test_encode_is_cross_lingual(sem_pkg):
|
|
60
|
+
"""A RU/EN translation pair must score higher than an unrelated phrase."""
|
|
61
|
+
from threadkeeper import embeddings as emb
|
|
62
|
+
v = emb._encode(["кошка", "cat", "quarterly financial report"])
|
|
63
|
+
assert float(v[0] @ v[1]) > float(v[0] @ v[2])
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def test_embed_tag(sem_pkg):
|
|
67
|
+
from threadkeeper import embeddings as emb
|
|
68
|
+
active = sem_pkg["config"].EMBED_BACKEND
|
|
69
|
+
assert emb.embed_tag(b"\x00\x01") == active
|
|
70
|
+
assert emb.embed_tag(None) is None
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
# ── write-path tagging ───────────────────────────────────────────────
|
|
74
|
+
|
|
75
|
+
def test_new_note_carries_backend_tag(sem_pkg):
|
|
76
|
+
tid = _tool(sem_pkg, "open_thread")(question="backend tag test")
|
|
77
|
+
_tool(sem_pkg, "note")(thread_id=tid,
|
|
78
|
+
content="tagged note about idempotency keys",
|
|
79
|
+
kind="insight")
|
|
80
|
+
conn = sem_pkg["db"].get_db()
|
|
81
|
+
active = sem_pkg["config"].EMBED_BACKEND
|
|
82
|
+
row = conn.execute(
|
|
83
|
+
"SELECT embedding, embed_backend FROM notes "
|
|
84
|
+
"WHERE thread_id=? ORDER BY id DESC LIMIT 1",
|
|
85
|
+
(tid,),
|
|
86
|
+
).fetchone()
|
|
87
|
+
assert row["embedding"] is not None
|
|
88
|
+
assert row["embed_backend"] == active
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
# ── migration ────────────────────────────────────────────────────────
|
|
92
|
+
|
|
93
|
+
def test_migration_recomputes_tags_and_is_idempotent(sem_pkg):
|
|
94
|
+
from threadkeeper import migrate_embeddings as mig
|
|
95
|
+
active = sem_pkg["config"].EMBED_BACKEND
|
|
96
|
+
conn = sem_pkg["db"].get_db()
|
|
97
|
+
_seed_legacy_notes(conn, 3)
|
|
98
|
+
|
|
99
|
+
assert mig._count_stale(conn, "notes", active) == 3
|
|
100
|
+
|
|
101
|
+
rc = mig.run(do_notes=True, do_dialog=False, batch=2,
|
|
102
|
+
dry_run=False, log=lambda _m: None)
|
|
103
|
+
assert rc == 0
|
|
104
|
+
assert mig._count_stale(conn, "notes", active) == 0
|
|
105
|
+
tagged = conn.execute(
|
|
106
|
+
"SELECT COUNT(*) FROM notes WHERE embed_backend=?", (active,)
|
|
107
|
+
).fetchone()[0]
|
|
108
|
+
assert tagged >= 3
|
|
109
|
+
|
|
110
|
+
# idempotent: a second pass finds nothing stale and changes nothing.
|
|
111
|
+
rc2 = mig.run(do_notes=True, do_dialog=False, batch=2,
|
|
112
|
+
dry_run=False, log=lambda _m: None)
|
|
113
|
+
assert rc2 == 0
|
|
114
|
+
assert mig._count_stale(conn, "notes", active) == 0
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def test_migration_dry_run_writes_nothing(sem_pkg):
|
|
118
|
+
from threadkeeper import migrate_embeddings as mig
|
|
119
|
+
active = sem_pkg["config"].EMBED_BACKEND
|
|
120
|
+
conn = sem_pkg["db"].get_db()
|
|
121
|
+
_seed_legacy_notes(conn, 2)
|
|
122
|
+
|
|
123
|
+
assert mig._count_stale(conn, "notes", active) == 2
|
|
124
|
+
mig.run(do_notes=True, do_dialog=False, batch=10,
|
|
125
|
+
dry_run=True, log=lambda _m: None)
|
|
126
|
+
# still stale — dry run must not touch the rows
|
|
127
|
+
assert mig._count_stale(conn, "notes", active) == 2
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def test_migration_requires_a_scope_flag(sem_pkg):
|
|
131
|
+
from threadkeeper import migrate_embeddings as mig
|
|
132
|
+
with pytest.raises(SystemExit):
|
|
133
|
+
mig.main([]) # argparse error → SystemExit(2)
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
Imported wherever a constant or config is needed; cheap to import."""
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
+
import importlib.util
|
|
5
6
|
import os
|
|
6
7
|
from pathlib import Path
|
|
7
8
|
from typing import Optional
|
|
@@ -15,6 +16,23 @@ EMBED_MODEL_NAME: str = os.environ.get(
|
|
|
15
16
|
"paraphrase-multilingual-MiniLM-L12-v2", # 118 MB, RU+EN cross-lingual
|
|
16
17
|
)
|
|
17
18
|
|
|
19
|
+
# Embedding runtime backend. 'onnx' (default) runs the model through fastembed /
|
|
20
|
+
# ONNX Runtime — no PyTorch, ~700MB footprint (vs ~1.8GB). 'sentence-transformers' is
|
|
21
|
+
# the legacy PyTorch path, kept as an opt-in fallback (install `.[semantic-st]`
|
|
22
|
+
# and set THREADKEEPER_EMBED_BACKEND=sentence-transformers). Both produce the
|
|
23
|
+
# same 384-dim vectors, but fastembed's are numerically NOT identical to ST's,
|
|
24
|
+
# so switching backends warrants a `tk-migrate-embeddings --all` recompute.
|
|
25
|
+
EMBED_BACKEND: str = os.environ.get(
|
|
26
|
+
"THREADKEEPER_EMBED_BACKEND", "onnx"
|
|
27
|
+
).strip().lower()
|
|
28
|
+
|
|
29
|
+
# fastembed addresses the model under its sentence-transformers org prefix;
|
|
30
|
+
# SentenceTransformer accepts the bare name. Normalize for the ONNX backend.
|
|
31
|
+
FASTEMBED_MODEL_ID: str = (
|
|
32
|
+
EMBED_MODEL_NAME if "/" in EMBED_MODEL_NAME
|
|
33
|
+
else f"sentence-transformers/{EMBED_MODEL_NAME}"
|
|
34
|
+
)
|
|
35
|
+
|
|
18
36
|
DB_PATH.parent.mkdir(parents=True, exist_ok=True)
|
|
19
37
|
|
|
20
38
|
# One-shot migration from the historical name `memory_partner`. If the new
|
|
@@ -52,15 +70,26 @@ NO_EMBEDDINGS: bool = os.environ.get(
|
|
|
52
70
|
# Optional semantic search. If sentence-transformers is not installed OR the
|
|
53
71
|
# no-embeddings opt-out is set, fall back to FTS5 keyword matching + delegate.
|
|
54
72
|
# Brief still works either way.
|
|
73
|
+
def _installed(*mods: str) -> bool:
|
|
74
|
+
"""True if every module is importable, checked WITHOUT importing it.
|
|
75
|
+
|
|
76
|
+
`find_spec` locates the module via the import machinery but never executes
|
|
77
|
+
it — so probing availability here doesn't pull PyTorch / ONNX Runtime /
|
|
78
|
+
tokenizers (and their thread pools) into every process that imports config.
|
|
79
|
+
The heavy import stays lazy in `embeddings._get_model()`.
|
|
80
|
+
"""
|
|
81
|
+
try:
|
|
82
|
+
return all(importlib.util.find_spec(m) is not None for m in mods)
|
|
83
|
+
except (ImportError, ValueError):
|
|
84
|
+
return False
|
|
85
|
+
|
|
86
|
+
|
|
55
87
|
if NO_EMBEDDINGS:
|
|
56
88
|
SEMANTIC_AVAILABLE: bool = False
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
SEMANTIC_AVAILABLE = True
|
|
62
|
-
except Exception:
|
|
63
|
-
SEMANTIC_AVAILABLE = False
|
|
89
|
+
elif EMBED_BACKEND == "sentence-transformers":
|
|
90
|
+
SEMANTIC_AVAILABLE = _installed("sentence_transformers", "numpy")
|
|
91
|
+
else: # 'onnx' (default)
|
|
92
|
+
SEMANTIC_AVAILABLE = _installed("fastembed", "numpy")
|
|
64
93
|
|
|
65
94
|
# Client label used for `presence`/`sessions` rows.
|
|
66
95
|
CLIENT_LABEL: str = os.environ.get("THREADKEEPER_CLIENT", "claude")
|
|
@@ -72,7 +72,8 @@ CREATE TABLE IF NOT EXISTS notes (
|
|
|
72
72
|
kind TEXT NOT NULL,
|
|
73
73
|
created_at INTEGER NOT NULL,
|
|
74
74
|
session_id TEXT,
|
|
75
|
-
embedding BLOB
|
|
75
|
+
embedding BLOB,
|
|
76
|
+
embed_backend TEXT -- backend that produced `embedding`; NULL = legacy
|
|
76
77
|
);
|
|
77
78
|
|
|
78
79
|
CREATE TABLE IF NOT EXISTS verbatim (
|
|
@@ -143,7 +144,8 @@ CREATE TABLE IF NOT EXISTS dialog_messages (
|
|
|
143
144
|
content TEXT NOT NULL, -- concatenated text blocks
|
|
144
145
|
model TEXT,
|
|
145
146
|
created_at INTEGER NOT NULL,
|
|
146
|
-
embedding BLOB
|
|
147
|
+
embedding BLOB,
|
|
148
|
+
embed_backend TEXT -- backend that produced `embedding`; NULL = legacy
|
|
147
149
|
);
|
|
148
150
|
|
|
149
151
|
CREATE TABLE IF NOT EXISTS ingest_state (
|
|
@@ -500,6 +502,11 @@ def get_db() -> sqlite3.Connection:
|
|
|
500
502
|
"ALTER TABLE skill_usage ADD COLUMN wrong_count "
|
|
501
503
|
"INTEGER NOT NULL DEFAULT 0",
|
|
502
504
|
"ALTER TABLE skill_usage ADD COLUMN last_wrong_at INTEGER",
|
|
505
|
+
# Embedding backend tag. NULL = legacy (sentence-transformers, pre-ONNX
|
|
506
|
+
# migration). New/recomputed rows carry 'onnx' or 'sentence-transformers'
|
|
507
|
+
# so `tk-migrate-embeddings` can find stale vectors and skip done ones.
|
|
508
|
+
"ALTER TABLE notes ADD COLUMN embed_backend TEXT",
|
|
509
|
+
"ALTER TABLE dialog_messages ADD COLUMN embed_backend TEXT",
|
|
503
510
|
):
|
|
504
511
|
try:
|
|
505
512
|
conn.execute(ddl)
|
|
@@ -17,7 +17,12 @@ import sqlite3
|
|
|
17
17
|
import threading
|
|
18
18
|
from typing import Optional
|
|
19
19
|
|
|
20
|
-
from .config import
|
|
20
|
+
from .config import (
|
|
21
|
+
SEMANTIC_AVAILABLE,
|
|
22
|
+
EMBED_MODEL_NAME,
|
|
23
|
+
EMBED_BACKEND,
|
|
24
|
+
FASTEMBED_MODEL_ID,
|
|
25
|
+
)
|
|
21
26
|
from . import db as _db
|
|
22
27
|
|
|
23
28
|
|
|
@@ -29,13 +34,22 @@ _model = None
|
|
|
29
34
|
_model_lock = threading.RLock()
|
|
30
35
|
|
|
31
36
|
def _get_model():
|
|
37
|
+
"""Lazily load and cache the embedding model for the active backend.
|
|
38
|
+
|
|
39
|
+
'onnx' (default) → fastembed.TextEmbedding (ONNX Runtime, no PyTorch).
|
|
40
|
+
'sentence-transformers' → the legacy PyTorch path (opt-in fallback).
|
|
41
|
+
"""
|
|
32
42
|
global _model
|
|
33
43
|
if not SEMANTIC_AVAILABLE:
|
|
34
44
|
return None
|
|
35
45
|
with _model_lock:
|
|
36
46
|
if _model is None:
|
|
37
|
-
|
|
38
|
-
|
|
47
|
+
if EMBED_BACKEND == "sentence-transformers":
|
|
48
|
+
from sentence_transformers import SentenceTransformer # type: ignore
|
|
49
|
+
_model = SentenceTransformer(EMBED_MODEL_NAME)
|
|
50
|
+
else: # 'onnx' (default)
|
|
51
|
+
from fastembed import TextEmbedding # type: ignore
|
|
52
|
+
_model = TextEmbedding(model_name=FASTEMBED_MODEL_ID)
|
|
39
53
|
return _model
|
|
40
54
|
|
|
41
55
|
|
|
@@ -66,23 +80,55 @@ def unload_model() -> bool:
|
|
|
66
80
|
del model
|
|
67
81
|
return True
|
|
68
82
|
|
|
69
|
-
def
|
|
83
|
+
def _encode(texts: list[str]):
|
|
84
|
+
"""Backend-agnostic batch encode → L2-normalized float32 array of shape
|
|
85
|
+
(len(texts), EMBED_DIM), or None when semantic search is unavailable.
|
|
86
|
+
|
|
87
|
+
Both backends are normalized to unit length here so the dot product used
|
|
88
|
+
by the vec0 and legacy paths equals cosine similarity, regardless of
|
|
89
|
+
whether the backend already normalizes.
|
|
90
|
+
"""
|
|
70
91
|
with _model_lock:
|
|
71
92
|
m = _get_model()
|
|
72
93
|
if m is None:
|
|
73
94
|
return None
|
|
74
|
-
|
|
75
|
-
|
|
95
|
+
import numpy as np # type: ignore
|
|
96
|
+
if EMBED_BACKEND == "sentence-transformers":
|
|
97
|
+
arr = np.asarray(m.encode(list(texts)), dtype="float32")
|
|
98
|
+
else: # fastembed generator → stack
|
|
99
|
+
arr = np.asarray(list(m.embed(list(texts))), dtype="float32")
|
|
100
|
+
norms = np.linalg.norm(arr, axis=1, keepdims=True)
|
|
101
|
+
norms[norms == 0] = 1.0
|
|
102
|
+
return (arr / norms).astype("float32")
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def encode_many(texts: list[str]):
|
|
106
|
+
"""Public batch encoder for the migration command. Returns the same
|
|
107
|
+
normalized float32 array as `_encode`, or None when unavailable."""
|
|
108
|
+
return _encode(texts)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def embed_tag(blob: Optional[bytes]) -> Optional[str]:
|
|
112
|
+
"""Backend label to store in the `embed_backend` column alongside a freshly
|
|
113
|
+
written embedding blob. None when no embedding was produced, so legacy /
|
|
114
|
+
NULL-vector rows stay untagged."""
|
|
115
|
+
return EMBED_BACKEND if blob is not None else None
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def _embed(text: str) -> Optional[bytes]:
|
|
119
|
+
arr = _encode([text])
|
|
120
|
+
if arr is None:
|
|
121
|
+
return None
|
|
122
|
+
return arr[0].astype("float32").tobytes()
|
|
76
123
|
|
|
77
124
|
|
|
78
125
|
def _cosine_search(conn: sqlite3.Connection, query: str, k: int) -> list[dict]:
|
|
79
126
|
"""Top-k cosine over notes. Uses vec0 ANN when available."""
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
qv = m.encode([query], normalize_embeddings=True)[0].astype("float32")
|
|
127
|
+
import numpy as np # type: ignore
|
|
128
|
+
qa = _encode([query])
|
|
129
|
+
if qa is None:
|
|
130
|
+
return []
|
|
131
|
+
qv = qa[0]
|
|
86
132
|
if _vec_on():
|
|
87
133
|
try:
|
|
88
134
|
return _vec0_notes_search(conn, qv.tobytes(), k)
|
|
@@ -131,12 +177,11 @@ def _vec0_notes_search(conn: sqlite3.Connection, qv_blob: bytes,
|
|
|
131
177
|
|
|
132
178
|
def _dialog_cosine_search(conn, query: str, k: int) -> list[dict]:
|
|
133
179
|
"""Top-k cosine over dialog_messages. Uses vec0 ANN when available."""
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
qv = m.encode([query], normalize_embeddings=True)[0].astype("float32")
|
|
180
|
+
import numpy as np # type: ignore
|
|
181
|
+
qa = _encode([query])
|
|
182
|
+
if qa is None:
|
|
183
|
+
return []
|
|
184
|
+
qv = qa[0]
|
|
140
185
|
if _vec_on():
|
|
141
186
|
try:
|
|
142
187
|
return _vec0_dialog_search(conn, qv.tobytes(), k)
|
|
@@ -18,7 +18,7 @@ from .config import (
|
|
|
18
18
|
SEMANTIC_AVAILABLE,
|
|
19
19
|
)
|
|
20
20
|
from .db import get_db
|
|
21
|
-
from .embeddings import _embed
|
|
21
|
+
from .embeddings import _embed, embed_tag
|
|
22
22
|
|
|
23
23
|
_ingest_thread: Optional[threading.Thread] = None
|
|
24
24
|
_ingest_lock = threading.Lock()
|
|
@@ -215,11 +215,11 @@ def _ingest_file(conn: sqlite3.Connection, fp: Path, max_msgs: int,
|
|
|
215
215
|
emb = _embed(text[:2000]) if SEMANTIC_AVAILABLE else None
|
|
216
216
|
conn.execute(
|
|
217
217
|
"INSERT INTO dialog_messages (uuid, source, project, session_id, "
|
|
218
|
-
"role, content, model, created_at, embedding) "
|
|
219
|
-
"VALUES (
|
|
218
|
+
"role, content, model, created_at, embedding, embed_backend) "
|
|
219
|
+
"VALUES (?,?,?,?,?,?,?,?,?,?)",
|
|
220
220
|
(nm.uuid, adapter.name, adapter.project_label(fp),
|
|
221
221
|
nm.session_id, nm.role, text,
|
|
222
|
-
nm.model, nm.created_at, emb)
|
|
222
|
+
nm.model, nm.created_at, emb, embed_tag(emb))
|
|
223
223
|
)
|
|
224
224
|
try:
|
|
225
225
|
conn.execute(
|
|
@@ -381,7 +381,7 @@ def _backfill_note_embeddings(conn: sqlite3.Connection, max_n: int = 20) -> int:
|
|
|
381
381
|
return 0
|
|
382
382
|
if not rows:
|
|
383
383
|
return 0
|
|
384
|
-
from .embeddings import _embed, _vec_upsert_note
|
|
384
|
+
from .embeddings import _embed, _vec_upsert_note, embed_tag
|
|
385
385
|
updated = 0
|
|
386
386
|
for r in rows:
|
|
387
387
|
try:
|
|
@@ -392,8 +392,8 @@ def _backfill_note_embeddings(conn: sqlite3.Connection, max_n: int = 20) -> int:
|
|
|
392
392
|
continue
|
|
393
393
|
try:
|
|
394
394
|
conn.execute(
|
|
395
|
-
"UPDATE notes SET embedding=? WHERE id=?",
|
|
396
|
-
(emb, r["id"]),
|
|
395
|
+
"UPDATE notes SET embedding=?, embed_backend=? WHERE id=?",
|
|
396
|
+
(emb, embed_tag(emb), r["id"]),
|
|
397
397
|
)
|
|
398
398
|
_vec_upsert_note(conn, r["id"], emb)
|
|
399
399
|
updated += 1
|