threadkeeper 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- threadkeeper/__init__.py +8 -0
- threadkeeper/_mcp.py +6 -0
- threadkeeper/_setup.py +299 -0
- threadkeeper/adapters/__init__.py +40 -0
- threadkeeper/adapters/_hook_helpers.py +72 -0
- threadkeeper/adapters/base.py +152 -0
- threadkeeper/adapters/claude_code.py +178 -0
- threadkeeper/adapters/claude_desktop.py +128 -0
- threadkeeper/adapters/codex.py +259 -0
- threadkeeper/adapters/copilot.py +195 -0
- threadkeeper/adapters/gemini.py +169 -0
- threadkeeper/adapters/vscode.py +144 -0
- threadkeeper/brief.py +735 -0
- threadkeeper/config.py +216 -0
- threadkeeper/curator.py +390 -0
- threadkeeper/db.py +474 -0
- threadkeeper/embeddings.py +232 -0
- threadkeeper/extract_daemon.py +125 -0
- threadkeeper/helpers.py +101 -0
- threadkeeper/i18n.py +342 -0
- threadkeeper/identity.py +237 -0
- threadkeeper/ingest.py +507 -0
- threadkeeper/lessons.py +170 -0
- threadkeeper/nudges.py +257 -0
- threadkeeper/process_health.py +202 -0
- threadkeeper/review_prompts.py +207 -0
- threadkeeper/search_proxy.py +160 -0
- threadkeeper/server.py +55 -0
- threadkeeper/shadow_review.py +358 -0
- threadkeeper/skill_watcher.py +96 -0
- threadkeeper/spawn_budget.py +246 -0
- threadkeeper/tools/__init__.py +2 -0
- threadkeeper/tools/concepts.py +111 -0
- threadkeeper/tools/consolidate.py +222 -0
- threadkeeper/tools/core_memory.py +109 -0
- threadkeeper/tools/correlation.py +116 -0
- threadkeeper/tools/curator.py +121 -0
- threadkeeper/tools/dialectic.py +359 -0
- threadkeeper/tools/dialog.py +131 -0
- threadkeeper/tools/distill.py +184 -0
- threadkeeper/tools/extract.py +411 -0
- threadkeeper/tools/graph.py +183 -0
- threadkeeper/tools/invariants.py +177 -0
- threadkeeper/tools/lessons.py +110 -0
- threadkeeper/tools/missed_spawns.py +142 -0
- threadkeeper/tools/peers.py +579 -0
- threadkeeper/tools/pickup.py +148 -0
- threadkeeper/tools/probes.py +251 -0
- threadkeeper/tools/process_health.py +90 -0
- threadkeeper/tools/session.py +34 -0
- threadkeeper/tools/shadow_review.py +106 -0
- threadkeeper/tools/skills.py +856 -0
- threadkeeper/tools/spawn.py +871 -0
- threadkeeper/tools/style.py +44 -0
- threadkeeper/tools/threads.py +299 -0
- threadkeeper-0.4.0.dist-info/METADATA +351 -0
- threadkeeper-0.4.0.dist-info/RECORD +61 -0
- threadkeeper-0.4.0.dist-info/WHEEL +5 -0
- threadkeeper-0.4.0.dist-info/entry_points.txt +2 -0
- threadkeeper-0.4.0.dist-info/licenses/LICENSE +21 -0
- threadkeeper-0.4.0.dist-info/top_level.txt +1 -0
threadkeeper/i18n.py
ADDED
|
@@ -0,0 +1,342 @@
|
|
|
1
|
+
"""Localized regex patterns and prompt bundles.
|
|
2
|
+
|
|
3
|
+
This module is the SINGLE PLACE where non-English vocabulary lives in
|
|
4
|
+
the thread-keeper codebase. Other modules import named constants from
|
|
5
|
+
here and don't carry locale strings of their own.
|
|
6
|
+
|
|
7
|
+
Why centralize: thread-keeper is built CLI-agnostic and aims to be
|
|
8
|
+
language-agnostic too. The user may write in English, Russian, Spanish,
|
|
9
|
+
Mandarin, … — the agent has to recognize the same intents regardless.
|
|
10
|
+
By keeping all multilingual vocabulary in one named bundle:
|
|
11
|
+
* The rest of the codebase stays readable English-only.
|
|
12
|
+
* Adding a new language = appending a new section here; no edits to
|
|
13
|
+
brief.py / extract.py / shadow_review.py.
|
|
14
|
+
* Audits for accidental non-English literals only have to whitelist
|
|
15
|
+
this file.
|
|
16
|
+
|
|
17
|
+
Supported locales (in order of speaker count):
|
|
18
|
+
English, Mandarin Chinese, Hindi, Spanish, French, Arabic, Russian,
|
|
19
|
+
Portuguese, German, Japanese.
|
|
20
|
+
|
|
21
|
+
Notes on regex boundaries across scripts:
|
|
22
|
+
* Latin and Cyrillic words use ASCII `\\b` cleanly.
|
|
23
|
+
* Devanagari (Hindi) and Arabic also work with `\\b` because the
|
|
24
|
+
Python `re` module with `re.UNICODE` treats their letters as word
|
|
25
|
+
chars.
|
|
26
|
+
* Mandarin has no inter-character word boundary in regex terms, so
|
|
27
|
+
its tokens are matched as literals with optional `(?<!\\w)` /
|
|
28
|
+
`(?!\\w)` lookaround instead of `\\b`. The patterns below use bare
|
|
29
|
+
literals — false positives are practically impossible because
|
|
30
|
+
Han characters don't occur inside other-language vocabulary.
|
|
31
|
+
"""
|
|
32
|
+
from __future__ import annotations
|
|
33
|
+
|
|
34
|
+
import re
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# ─────────────────────────────────────────────────────────────────────
|
|
38
|
+
# Parallel-work cues (brief.py SPAWN_CUE)
|
|
39
|
+
# Three families, each cross-language:
|
|
40
|
+
# (a) explicit parallel vocabulary
|
|
41
|
+
# (b) count + plural-noun — count and noun may come from different
|
|
42
|
+
# languages ("2 вопроса", "三 questions"). Kept as ONE combined
|
|
43
|
+
# alternation so cross-mix matches.
|
|
44
|
+
# (c) second-or-later numbered list item ("2.", "3)")
|
|
45
|
+
# ─────────────────────────────────────────────────────────────────────
|
|
46
|
+
|
|
47
|
+
# Parallel-vocabulary alternation, split by script. The Latin/Cyrillic/
|
|
48
|
+
# Devanagari/Arabic family is wrapped with \b in the final regex
|
|
49
|
+
# (whitespace-separated word boundaries work cleanly for them). Han
|
|
50
|
+
# (Mandarin) is matched as bare literals because \b never triggers
|
|
51
|
+
# between two CJK characters in Python's re engine.
|
|
52
|
+
_PARALLEL_WORDS_BOUNDED = (
|
|
53
|
+
# English
|
|
54
|
+
r"in\s+parallel|while\s+you|simultaneously|meanwhile|"
|
|
55
|
+
r"in\s+the\s+background|fork\b"
|
|
56
|
+
# Spanish
|
|
57
|
+
r"|en\s+paralelo|simult[áa]neamente|mientras|al\s+mismo\s+tiempo|"
|
|
58
|
+
r"en\s+segundo\s+plano"
|
|
59
|
+
# Portuguese
|
|
60
|
+
r"|em\s+paralelo|simultaneamente|enquanto|ao\s+mesmo\s+tempo|"
|
|
61
|
+
r"em\s+segundo\s+plano"
|
|
62
|
+
# French
|
|
63
|
+
r"|en\s+parall[èe]le|simultan[ée]ment|pendant\s+que|"
|
|
64
|
+
r"en\s+m[êe]me\s+temps|en\s+arri[èe]re-plan"
|
|
65
|
+
# German
|
|
66
|
+
r"|parallel|gleichzeitig|w[äa]hrenddessen|"
|
|
67
|
+
r"zur\s+gleichen\s+zeit|im\s+hintergrund"
|
|
68
|
+
# Russian
|
|
69
|
+
r"|параллельн\w*|одновременн\w*|в\s+то\s+время\s+как|пока\s+ты|"
|
|
70
|
+
r"заодно|в\s+фоне|многопоточн\w*"
|
|
71
|
+
# Hindi (Devanagari)
|
|
72
|
+
r"|समानांतर|एक\s+साथ|साथ\s+ही|पृष्ठभूमि\s+में"
|
|
73
|
+
# Arabic
|
|
74
|
+
r"|بالتوازي|في\s+نفس\s+الوقت|بالخلفية|متزامن\w*"
|
|
75
|
+
)
|
|
76
|
+
# CJK family: Mandarin + Japanese share no-whitespace word boundaries
|
|
77
|
+
# and need to be matched as bare literals (no \b).
|
|
78
|
+
_PARALLEL_WORDS_CJK = (
|
|
79
|
+
# Mandarin
|
|
80
|
+
r"并行|同时|与此同时|在后台|后台运行"
|
|
81
|
+
# Japanese
|
|
82
|
+
r"|並行で|並列で|同時に|バックグラウンドで|裏で"
|
|
83
|
+
)
|
|
84
|
+
_COUNT_WORDS = (
|
|
85
|
+
r"[2-9]" # digits cross-language
|
|
86
|
+
r"|two|three|four|five|several|multiple"
|
|
87
|
+
r"|dos|tres|cuatro|cinco|varios|varias|m[úu]ltiples"
|
|
88
|
+
r"|dois|duas|tr[êe]s|quatro|cinco|v[áa]rios|v[áa]rias|m[úu]ltiplos"
|
|
89
|
+
r"|deux|trois|quatre|cinq|plusieurs|multiples"
|
|
90
|
+
r"|zwei|drei|vier|f[üu]nf|mehrere|mehrfach"
|
|
91
|
+
r"|две|двух|три|трёх|трех|четыре|четырёх|пять"
|
|
92
|
+
r"|दो|तीन|चार|पाँच|पांच|कई|कुछ"
|
|
93
|
+
r"|اثنان|اثنين|ثلاث\w*|أربع\w*|خمس\w*|عدة"
|
|
94
|
+
r"|两|三|四|五|几|多个|多项"
|
|
95
|
+
r"|二つ|三つ|四つ|五つ|複数の?|いくつかの?"
|
|
96
|
+
)
|
|
97
|
+
_PLURAL_NOUNS = (
|
|
98
|
+
# English
|
|
99
|
+
r"things?|tasks?|questions?|items?|steps?|topics?|points?|"
|
|
100
|
+
r"problems?|reasons?|options?"
|
|
101
|
+
# Spanish
|
|
102
|
+
r"|cosas|tareas|preguntas|pasos|temas|problemas|razones|opciones"
|
|
103
|
+
# Portuguese
|
|
104
|
+
r"|coisas|tarefas|perguntas|passos|etapas|t[óo]picos|pontos|"
|
|
105
|
+
r"problemas|raz[õo]es|op[çc][õo]es"
|
|
106
|
+
# French
|
|
107
|
+
r"|choses|t[âa]ches|questions|[ée]tapes|sujets|probl[èe]mes|"
|
|
108
|
+
r"raisons|options"
|
|
109
|
+
# German
|
|
110
|
+
r"|sachen|aufgaben|fragen|schritte|themen|punkte|"
|
|
111
|
+
r"probleme|gr[üu]nde|optionen"
|
|
112
|
+
# Russian
|
|
113
|
+
r"|вопрос\w*|задач\w*|шаг\w*|пункт\w*|штук\w*|тем\w*|причин\w*|"
|
|
114
|
+
r"варианта?|вариант\w*"
|
|
115
|
+
# Hindi
|
|
116
|
+
r"|काम|चीज़ें|सवाल|मुद्दे|कारण|विकल्प"
|
|
117
|
+
# Arabic
|
|
118
|
+
r"|أشياء|مهام|أسئلة|خطوات|نقاط|أسباب|خيارات"
|
|
119
|
+
# Mandarin
|
|
120
|
+
r"|件事|任务|问题|步骤|项目|方面|原因|选项"
|
|
121
|
+
# Japanese
|
|
122
|
+
r"|事|タスク|質問|ステップ|項目|問題|理由|選択肢"
|
|
123
|
+
)
|
|
124
|
+
SPAWN_CUE_RE = re.compile(
|
|
125
|
+
rf"\b(?:{_PARALLEL_WORDS_BOUNDED})\b"
|
|
126
|
+
rf"|(?:{_PARALLEL_WORDS_CJK})" # CJK: bare, no \b
|
|
127
|
+
rf"|\b(?:{_COUNT_WORDS})\s+(?:{_PLURAL_NOUNS})\b"
|
|
128
|
+
rf"|(?:^|\n)\s*[2-9][\.\)\:]\s+",
|
|
129
|
+
re.IGNORECASE | re.UNICODE,
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
# ─────────────────────────────────────────────────────────────────────
|
|
134
|
+
# Want / rule statements (extract.py WANT_RE)
|
|
135
|
+
# User stating a class-level want, never/always rule, normative phrasing.
|
|
136
|
+
# ─────────────────────────────────────────────────────────────────────
|
|
137
|
+
|
|
138
|
+
_WANT_EN = (
|
|
139
|
+
r"\b(?:i\s+want\s+(?:you\s+)?to|i\s+need\s+(?:you\s+)?to|"
|
|
140
|
+
r"you\s+(?:must|should|shouldn'?t|must\s+not)|"
|
|
141
|
+
r"don'?t\s+(?:ever\s+)?|never\s+|always\s+|"
|
|
142
|
+
r"from\s+now\s+on|going\s+forward)\b"
|
|
143
|
+
)
|
|
144
|
+
_WANT_ES = (
|
|
145
|
+
r"\b(?:quiero\s+que|necesito\s+que|debes(?:\s+no)?|"
|
|
146
|
+
r"no\s+debes|nunca\s+|siempre\s+|a\s+partir\s+de\s+ahora)\b"
|
|
147
|
+
)
|
|
148
|
+
_WANT_PT = (
|
|
149
|
+
r"\b(?:eu\s+quero\s+que|quero\s+que|eu\s+preciso\s+que|"
|
|
150
|
+
r"voc[êe]\s+(?:deve|n[ãa]o\s+deve|precisa)|"
|
|
151
|
+
r"nunca\s+|sempre\s+|a\s+partir\s+de\s+agora|de\s+agora\s+em\s+diante)\b"
|
|
152
|
+
)
|
|
153
|
+
_WANT_FR = (
|
|
154
|
+
r"\b(?:je\s+veux\s+que|j'?ai\s+besoin\s+(?:de|que)|"
|
|
155
|
+
r"tu\s+dois(?:\s+pas)?|ne\s+pas\s+|jamais\s+|toujours\s+|"
|
|
156
|
+
r"[àa]\s+partir\s+de\s+maintenant)\b"
|
|
157
|
+
)
|
|
158
|
+
_WANT_DE = (
|
|
159
|
+
r"\b(?:ich\s+m[öo]chte,?\s+dass\s+du|ich\s+will,?\s+dass\s+du|"
|
|
160
|
+
r"du\s+(?:sollst|darfst\s+nicht|musst)|nie(?:mals)?\s+|immer\s+|"
|
|
161
|
+
r"ab\s+(?:jetzt|sofort)|von\s+jetzt\s+an)\b"
|
|
162
|
+
)
|
|
163
|
+
_WANT_RU = (
|
|
164
|
+
r"\b(?:я\s+хочу\s+чтоб[ыь]?|хочу\s+чтоб[ыь]?|нужно\s+чтоб[ыь]?|"
|
|
165
|
+
r"надо\s+чтоб[ыь]?|должен\s+быть|не\s+должен|должно\s+быть|"
|
|
166
|
+
r"пусть\s+\S+\s+не|давай\s+чтоб[ыь]?|чтобы\s+ты\s+(?:не\s+)?)"
|
|
167
|
+
)
|
|
168
|
+
_WANT_HI = r"\b(?:मैं\s+चाहता\s+हूँ|मुझे\s+चाहिए|आपको\s+करना\s+चाहिए|मत\s+करो|हमेशा\s+|कभी\s+नहीं)"
|
|
169
|
+
_WANT_AR = r"\b(?:أريدك\s+أن|تحتاج\s+إلى|يجب\s+(?:أن|ألا)|لا\s+تفعل\s+أبد[اً]?|دائم[اً]?|من\s+الآن)"
|
|
170
|
+
# Mandarin and Japanese use literal CJK with no boundary anchor.
|
|
171
|
+
_WANT_ZH = r"(?:我想要|我需要你|你应该|你不应该|你必须|不要再|永远不要|总是|从现在开始)"
|
|
172
|
+
_WANT_JA = (
|
|
173
|
+
r"(?:してほしい|してください|する必要がある|"
|
|
174
|
+
r"しなければならない|してはいけない|絶対に|いつも|今後|これから)"
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
WANT_RE = re.compile(
|
|
178
|
+
f"(?:{_WANT_EN})|(?:{_WANT_ES})|(?:{_WANT_PT})|(?:{_WANT_FR})|"
|
|
179
|
+
f"(?:{_WANT_DE})|(?:{_WANT_RU})|(?:{_WANT_HI})|(?:{_WANT_AR})|"
|
|
180
|
+
f"(?:{_WANT_ZH})|(?:{_WANT_JA})",
|
|
181
|
+
re.IGNORECASE | re.UNICODE,
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
# ─────────────────────────────────────────────────────────────────────
|
|
186
|
+
# Conclusion / takeaway markers (extract.py INSIGHT_MARKERS_RE)
|
|
187
|
+
# ─────────────────────────────────────────────────────────────────────
|
|
188
|
+
|
|
189
|
+
_INSIGHT_EN = (
|
|
190
|
+
r"\b(?:this\s+is\s+the|takeaway(?:\s|:)|"
|
|
191
|
+
r"key\s+(?:point|insight)|conclusion(?:\s|:)|"
|
|
192
|
+
r"the\s+bottom\s+line|the\s+gist\s+is)"
|
|
193
|
+
)
|
|
194
|
+
_INSIGHT_ES = (
|
|
195
|
+
r"\b(?:la\s+conclusi[óo]n|el\s+punto\s+clave|en\s+resumen|"
|
|
196
|
+
r"lo\s+importante\s+es|en\s+definitiva)"
|
|
197
|
+
)
|
|
198
|
+
_INSIGHT_PT = (
|
|
199
|
+
r"\b(?:a\s+conclus[ãa]o|o\s+ponto\s+(?:chave|principal)|"
|
|
200
|
+
r"em\s+resumo|o\s+importante\s+[ée]|no\s+fim\s+das\s+contas)"
|
|
201
|
+
)
|
|
202
|
+
_INSIGHT_FR = (
|
|
203
|
+
r"\b(?:la\s+conclusion|le\s+point\s+cl[ée]|en\s+r[ée]sum[ée]|"
|
|
204
|
+
r"l'?essentiel(?:\s+est)?|au\s+final)"
|
|
205
|
+
)
|
|
206
|
+
_INSIGHT_DE = (
|
|
207
|
+
r"\b(?:die\s+schlussfolgerung|der\s+hauptpunkt|kurz\s+gesagt|"
|
|
208
|
+
r"das\s+wichtige\s+ist|im\s+endeffekt|zusammengefasst)"
|
|
209
|
+
)
|
|
210
|
+
_INSIGHT_RU = (
|
|
211
|
+
r"\b(?:это\s+и\s+есть|ключев(?:ое|ая|ой)|вывод(?:\s|:)|"
|
|
212
|
+
r"главное\s+—|итог(?:\s|:)|суть\s+в\s+том)"
|
|
213
|
+
)
|
|
214
|
+
_INSIGHT_HI = r"(?:मुख्य\s+बात|निष्कर्ष|खास\s+बात|सार\s+यह)"
|
|
215
|
+
_INSIGHT_AR = r"(?:الخلاصة|النقطة\s+الأساسية|الأهم|باختصار)"
|
|
216
|
+
_INSIGHT_ZH = r"(?:关键是|结论是|重点是|总的来说|总结一下)"
|
|
217
|
+
_INSIGHT_JA = r"(?:結論は|要するに|重要なのは|ポイントは|要点は|まとめると)"
|
|
218
|
+
|
|
219
|
+
INSIGHT_MARKERS_RE = re.compile(
|
|
220
|
+
f"(?:{_INSIGHT_EN})|(?:{_INSIGHT_ES})|(?:{_INSIGHT_PT})|"
|
|
221
|
+
f"(?:{_INSIGHT_FR})|(?:{_INSIGHT_DE})|(?:{_INSIGHT_RU})|"
|
|
222
|
+
f"(?:{_INSIGHT_HI})|(?:{_INSIGHT_AR})|"
|
|
223
|
+
f"(?:{_INSIGHT_ZH})|(?:{_INSIGHT_JA})",
|
|
224
|
+
re.IGNORECASE | re.UNICODE,
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
# ─────────────────────────────────────────────────────────────────────
|
|
229
|
+
# "For example" markers (extract.py EXAMPLE_RE)
|
|
230
|
+
# ─────────────────────────────────────────────────────────────────────
|
|
231
|
+
|
|
232
|
+
_EXAMPLE_EN = r"\b(?:for\s+example|e\.?g\.?|such\s+as|like\s+when)\b"
|
|
233
|
+
_EXAMPLE_ES = r"\bpor\s+ejemplo\b"
|
|
234
|
+
_EXAMPLE_PT = r"\bpor\s+exemplo\b"
|
|
235
|
+
_EXAMPLE_FR = r"\bpar\s+exemple\b"
|
|
236
|
+
_EXAMPLE_DE = r"\b(?:zum\s+beispiel|z\.\s?b\.?)\b"
|
|
237
|
+
_EXAMPLE_RU = r"\bнаприме[р]?\b"
|
|
238
|
+
_EXAMPLE_HI = r"(?:उदाहरण\s+के\s+लिए|जैसे\s+कि)"
|
|
239
|
+
_EXAMPLE_AR = r"(?:على\s+سبيل\s+المثال|مثل)"
|
|
240
|
+
_EXAMPLE_ZH = r"(?:例如|比如|举例来说)"
|
|
241
|
+
_EXAMPLE_JA = r"(?:例えば|たとえば|例として)"
|
|
242
|
+
|
|
243
|
+
EXAMPLE_RE = re.compile(
|
|
244
|
+
f"(?:{_EXAMPLE_EN})|(?:{_EXAMPLE_ES})|(?:{_EXAMPLE_PT})|"
|
|
245
|
+
f"(?:{_EXAMPLE_FR})|(?:{_EXAMPLE_DE})|(?:{_EXAMPLE_RU})|"
|
|
246
|
+
f"(?:{_EXAMPLE_HI})|(?:{_EXAMPLE_AR})|"
|
|
247
|
+
f"(?:{_EXAMPLE_ZH})|(?:{_EXAMPLE_JA})",
|
|
248
|
+
re.IGNORECASE | re.UNICODE,
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
# ─────────────────────────────────────────────────────────────────────
|
|
253
|
+
# Pattern / regularity framing (extract.py FRAME_RE)
|
|
254
|
+
# ─────────────────────────────────────────────────────────────────────
|
|
255
|
+
|
|
256
|
+
_FRAME_EN = (
|
|
257
|
+
r"\b(?:pattern(?:s|:)|regularly|typically|usually|"
|
|
258
|
+
r"in\s+such\s+cases|whenever\s+\S+\s+then)"
|
|
259
|
+
)
|
|
260
|
+
_FRAME_ES = (
|
|
261
|
+
r"\b(?:patr[óo]n|t[íi]picamente|normalmente|generalmente|"
|
|
262
|
+
r"en\s+(?:estos|tales)\s+casos|cuando\s+\S+\s+entonces)"
|
|
263
|
+
)
|
|
264
|
+
_FRAME_PT = (
|
|
265
|
+
r"\b(?:padr[ãa]o|tipicamente|normalmente|geralmente|"
|
|
266
|
+
r"nesses\s+casos|sempre\s+que)"
|
|
267
|
+
)
|
|
268
|
+
_FRAME_FR = (
|
|
269
|
+
r"\b(?:motif|typiquement|normalement|g[ée]n[ée]ralement|"
|
|
270
|
+
r"dans\s+(?:ces|tels)\s+cas|chaque\s+fois\s+que)"
|
|
271
|
+
)
|
|
272
|
+
_FRAME_DE = (
|
|
273
|
+
r"\b(?:muster|typischerweise|normalerweise|[üu]blicherweise|"
|
|
274
|
+
r"in\s+solchen\s+f[äa]llen|immer\s+wenn)"
|
|
275
|
+
)
|
|
276
|
+
_FRAME_RU = (
|
|
277
|
+
r"\b(?:паттерн|регулярн|обычно|типичн|часто\s+бывает|"
|
|
278
|
+
r"в\s+таких\s+случаях|когда\s+\S+\s+—)"
|
|
279
|
+
)
|
|
280
|
+
_FRAME_HI = r"(?:पैटर्न|आमतौर\s+पर|ऐसे\s+मामलों\s+में|जब\s+भी)"
|
|
281
|
+
_FRAME_AR = r"(?:عادة|في\s+مثل\s+هذه\s+الحالات|كلما)"
|
|
282
|
+
_FRAME_ZH = r"(?:通常|一般来说|在这种情况下|每当)"
|
|
283
|
+
_FRAME_JA = r"(?:パターン|通常|一般的に|通例|このような場合|〜するたびに)"
|
|
284
|
+
|
|
285
|
+
FRAME_RE = re.compile(
|
|
286
|
+
f"(?:{_FRAME_EN})|(?:{_FRAME_ES})|(?:{_FRAME_PT})|"
|
|
287
|
+
f"(?:{_FRAME_FR})|(?:{_FRAME_DE})|(?:{_FRAME_RU})|"
|
|
288
|
+
f"(?:{_FRAME_HI})|(?:{_FRAME_AR})|"
|
|
289
|
+
f"(?:{_FRAME_ZH})|(?:{_FRAME_JA})",
|
|
290
|
+
re.IGNORECASE | re.UNICODE,
|
|
291
|
+
)
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
# ─────────────────────────────────────────────────────────────────────
|
|
295
|
+
# Prompt-embedded examples (shadow_review.py / tools/spawn.py)
|
|
296
|
+
# Multilingual examples shown inline to spawned children so they
|
|
297
|
+
# recognize class-level signals regardless of the user's language.
|
|
298
|
+
# ─────────────────────────────────────────────────────────────────────
|
|
299
|
+
|
|
300
|
+
SHADOW_CLASS_SIGNAL_EXAMPLES = (
|
|
301
|
+
'- "in this kind of task always X" (en) / '
|
|
302
|
+
'"в таких задачах X" (ru) / '
|
|
303
|
+
'"en este tipo de tarea siempre X" (es) / '
|
|
304
|
+
'"neste tipo de tarefa sempre X" (pt) / '
|
|
305
|
+
'"dans ce genre de tâche toujours X" (fr) / '
|
|
306
|
+
'"bei dieser Art von Aufgabe immer X" (de) / '
|
|
307
|
+
'"इस तरह के काम में हमेशा X" (hi) / '
|
|
308
|
+
'"في هذا النوع من المهام دائماً X" (ar) / '
|
|
309
|
+
'"在这种任务中总是 X" (zh) / '
|
|
310
|
+
'"このような作業ではいつも X" (ja)\n'
|
|
311
|
+
'- "stop doing Y" / "не делай Y" / "deja de hacer Y" / '
|
|
312
|
+
'"pare de fazer Y" / "arrête de faire Y" / "hör auf, Y zu tun" / '
|
|
313
|
+
'"Y करना बंद करो" / "توقف عن فعل Y" / '
|
|
314
|
+
'"不要再做 Y" / "Y をするのをやめて"\n'
|
|
315
|
+
'- "we got burned by Z last time" / "обожглись на Z" / '
|
|
316
|
+
'"nos quemamos con Z" / "nos queimamos com Z" / '
|
|
317
|
+
'"on s\'est brûlés sur Z" / "wir sind mit Z auf die Nase gefallen" / '
|
|
318
|
+
'"पिछली बार Z से नुकसान हुआ" / "تضررنا من Z" / '
|
|
319
|
+
'"上次被 Z 坑了" / "前回 Z で痛い目にあった"\n'
|
|
320
|
+
)
|
|
321
|
+
|
|
322
|
+
SPAWN_TRIGGER_PHRASE_EXAMPLES = (
|
|
323
|
+
'"while you do X" / "in parallel" / "this is going to take a while" / '
|
|
324
|
+
'"и ещё" / "параллельно" / "пока ты" / "заодно" / '
|
|
325
|
+
'"en paralelo" / "mientras" / '
|
|
326
|
+
'"em paralelo" / "enquanto" / '
|
|
327
|
+
'"en parallèle" / "pendant que" / '
|
|
328
|
+
'"parallel" / "gleichzeitig" / '
|
|
329
|
+
'"एक साथ" / "बीच में" / '
|
|
330
|
+
'"بالتوازي" / "في نفس الوقت" / '
|
|
331
|
+
'"同时" / "并行" / '
|
|
332
|
+
'"同時に" / "並行で"'
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
# ─────────────────────────────────────────────────────────────────────
|
|
337
|
+
# Supported-locales registry — handy for diagnostics and tests.
|
|
338
|
+
# ─────────────────────────────────────────────────────────────────────
|
|
339
|
+
|
|
340
|
+
SUPPORTED_LOCALES: tuple[str, ...] = (
|
|
341
|
+
"en", "zh", "hi", "es", "pt", "fr", "de", "ar", "ru", "ja",
|
|
342
|
+
)
|
threadkeeper/identity.py
ADDED
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
"""Per-process session bookkeeping and self-conversation-id detection.
|
|
2
|
+
One MCP server process = one session row + a cached cid for whoami()."""
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import re
|
|
7
|
+
import sqlite3
|
|
8
|
+
import secrets
|
|
9
|
+
import subprocess
|
|
10
|
+
import time
|
|
11
|
+
from typing import Optional
|
|
12
|
+
|
|
13
|
+
from .config import CLIENT_LABEL, SELF_CID_TTL_S, CLAUDE_PROJECTS_DIR, WRITE_ORIGIN
|
|
14
|
+
|
|
15
|
+
# ──────────────────────────────────────────────────────────────────────────────
|
|
16
|
+
# Session tracking. One MCP server process = one Claude Desktop window.
|
|
17
|
+
# ──────────────────────────────────────────────────────────────────────────────
|
|
18
|
+
_session_id: Optional[str] = None
|
|
19
|
+
_session_start: Optional[int] = None
|
|
20
|
+
_client_label = CLIENT_LABEL
|
|
21
|
+
|
|
22
|
+
# Self conversation_id detection. The jsonl stem (e.g. "570fe39e-…")
|
|
23
|
+
# uniquely identifies a window. Resolution prefers env override → ppid walk
|
|
24
|
+
# → mtime heuristic; _self_cid_via records which path won, for whoami().
|
|
25
|
+
_self_cid: Optional[str] = None
|
|
26
|
+
_self_cid_at: float = 0.0
|
|
27
|
+
_self_cid_via: Optional[str] = None # 'forced' | 'ppid' | 'mtime' | None
|
|
28
|
+
_self_cid_ttl_s: float = SELF_CID_TTL_S
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _ensure_cursor(conn: sqlite3.Connection) -> None:
|
|
32
|
+
"""First time a session looks at events, anchor cursor to current max so we
|
|
33
|
+
don't drown new sessions in ancient history."""
|
|
34
|
+
if _session_id is None:
|
|
35
|
+
return
|
|
36
|
+
if conn.execute("SELECT 1 FROM cursors WHERE session_id=?", (_session_id,)).fetchone():
|
|
37
|
+
return
|
|
38
|
+
max_id = conn.execute("SELECT COALESCE(MAX(id), 0) FROM events").fetchone()[0]
|
|
39
|
+
conn.execute(
|
|
40
|
+
"INSERT INTO cursors (session_id, last_event_id, updated_at) VALUES (?,?,?)",
|
|
41
|
+
(_session_id, max_id, int(time.time())),
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _emit(conn: sqlite3.Connection, kind: str,
|
|
46
|
+
target: Optional[str] = None, summary: Optional[str] = None) -> None:
|
|
47
|
+
"""Append to event log + bump heartbeat. Called by every mutating tool."""
|
|
48
|
+
if _session_id is None:
|
|
49
|
+
return
|
|
50
|
+
now = int(time.time())
|
|
51
|
+
conn.execute(
|
|
52
|
+
"INSERT INTO events (session_id, kind, target, summary, created_at) "
|
|
53
|
+
"VALUES (?,?,?,?,?)",
|
|
54
|
+
(_session_id, kind, target, (summary or "")[:200], now),
|
|
55
|
+
)
|
|
56
|
+
conn.execute(
|
|
57
|
+
"INSERT INTO presence (session_id, client, started_at, heartbeat_at, "
|
|
58
|
+
"current_thread, last_action) VALUES (?,?,?,?,?,?) "
|
|
59
|
+
"ON CONFLICT(session_id) DO UPDATE SET "
|
|
60
|
+
" heartbeat_at=excluded.heartbeat_at, "
|
|
61
|
+
" current_thread=COALESCE(excluded.current_thread, presence.current_thread), "
|
|
62
|
+
" last_action=excluded.last_action",
|
|
63
|
+
(_session_id, _client_label, _session_start or now, now, target, kind),
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _heartbeat(conn: sqlite3.Connection) -> None:
|
|
68
|
+
"""Touch presence without emitting an event (for read-only tool calls)."""
|
|
69
|
+
if _session_id is None:
|
|
70
|
+
return
|
|
71
|
+
conn.execute(
|
|
72
|
+
"UPDATE presence SET heartbeat_at=? WHERE session_id=?",
|
|
73
|
+
(int(time.time()), _session_id),
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _ensure_session(conn: sqlite3.Connection, client: Optional[str] = None) -> str:
|
|
78
|
+
global _session_id, _session_start
|
|
79
|
+
if _session_id is None:
|
|
80
|
+
# pid embedded so two processes can never collide; hex tail keeps id short.
|
|
81
|
+
# NB: claude desktop/code may multiplex several windows into one mcp
|
|
82
|
+
# server process — in that case all of them share this _session_id, and
|
|
83
|
+
# session-id-as-client-identity is a known false signal here.
|
|
84
|
+
_session_id = f"s_{os.getpid()}_{secrets.token_hex(2)}"
|
|
85
|
+
_session_start = int(time.time())
|
|
86
|
+
cli = client or _client_label
|
|
87
|
+
conn.execute(
|
|
88
|
+
"INSERT INTO sessions (id, started_at, client, write_origin) "
|
|
89
|
+
"VALUES (?,?,?,?)",
|
|
90
|
+
(_session_id, _session_start, cli, WRITE_ORIGIN),
|
|
91
|
+
)
|
|
92
|
+
conn.execute(
|
|
93
|
+
"INSERT INTO presence (session_id, client, started_at, heartbeat_at, "
|
|
94
|
+
"last_action) VALUES (?,?,?,?,?)",
|
|
95
|
+
(_session_id, cli, _session_start, _session_start, "session_start"),
|
|
96
|
+
)
|
|
97
|
+
_ensure_cursor(conn)
|
|
98
|
+
conn.commit()
|
|
99
|
+
# Lazy imports avoid circular module deps (ingest imports embeddings
|
|
100
|
+
# which imports nothing here — but we still keep this lazy in case
|
|
101
|
+
# the surface widens later).
|
|
102
|
+
try:
|
|
103
|
+
from . import ingest
|
|
104
|
+
ingest._ingest_all(conn, max_msgs=ingest.INGEST_CAP_PER_CALL)
|
|
105
|
+
except Exception:
|
|
106
|
+
pass # Never block session start on ingestion failure
|
|
107
|
+
try:
|
|
108
|
+
from . import ingest
|
|
109
|
+
ingest._backfill_dialog_fts_if_empty(conn)
|
|
110
|
+
except Exception:
|
|
111
|
+
pass # FTS unavailable shouldn't block session start
|
|
112
|
+
try:
|
|
113
|
+
from . import ingest
|
|
114
|
+
ingest._start_background_ingester()
|
|
115
|
+
except Exception:
|
|
116
|
+
pass
|
|
117
|
+
try:
|
|
118
|
+
from . import search_proxy
|
|
119
|
+
search_proxy.start_search_proxy()
|
|
120
|
+
except Exception:
|
|
121
|
+
pass
|
|
122
|
+
try:
|
|
123
|
+
from . import spawn_budget
|
|
124
|
+
spawn_budget.start_budget_daemon()
|
|
125
|
+
except Exception:
|
|
126
|
+
pass
|
|
127
|
+
try:
|
|
128
|
+
from . import skill_watcher
|
|
129
|
+
skill_watcher.start_skill_watcher()
|
|
130
|
+
except Exception:
|
|
131
|
+
pass
|
|
132
|
+
try:
|
|
133
|
+
from . import shadow_review
|
|
134
|
+
shadow_review.start_shadow_daemon()
|
|
135
|
+
except Exception:
|
|
136
|
+
pass
|
|
137
|
+
try:
|
|
138
|
+
from . import curator
|
|
139
|
+
curator.start_curator_daemon()
|
|
140
|
+
except Exception:
|
|
141
|
+
pass
|
|
142
|
+
try:
|
|
143
|
+
from . import extract_daemon
|
|
144
|
+
extract_daemon.start_extract_daemon()
|
|
145
|
+
except Exception:
|
|
146
|
+
pass
|
|
147
|
+
return _session_id
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
_UUID_RE = re.compile(
|
|
151
|
+
r"--(?:resume|session-id|continue)\s+([a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12})"
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def _resolve_self_cid_via_ppid() -> Optional[str]:
|
|
156
|
+
"""Walk up the process tree until we find a claude CLI invocation with
|
|
157
|
+
--resume/--session-id <uuid>. That uuid IS this conversation's id, with
|
|
158
|
+
zero flap. Bounded to 12 ancestors. macOS-friendly (uses `ps`)."""
|
|
159
|
+
try:
|
|
160
|
+
pid = os.getpid()
|
|
161
|
+
except OSError:
|
|
162
|
+
return None
|
|
163
|
+
for _ in range(12):
|
|
164
|
+
try:
|
|
165
|
+
r = subprocess.run(
|
|
166
|
+
["ps", "-p", str(pid), "-o", "ppid=,command="],
|
|
167
|
+
capture_output=True, text=True, timeout=2,
|
|
168
|
+
)
|
|
169
|
+
except (subprocess.SubprocessError, OSError):
|
|
170
|
+
return None
|
|
171
|
+
line = (r.stdout or "").strip()
|
|
172
|
+
if not line:
|
|
173
|
+
return None
|
|
174
|
+
# First column is ppid, rest is command
|
|
175
|
+
parts = line.split(None, 1)
|
|
176
|
+
if len(parts) < 2:
|
|
177
|
+
return None
|
|
178
|
+
try:
|
|
179
|
+
ppid = int(parts[0])
|
|
180
|
+
except ValueError:
|
|
181
|
+
return None
|
|
182
|
+
cmd = parts[1]
|
|
183
|
+
m = _UUID_RE.search(cmd)
|
|
184
|
+
if m:
|
|
185
|
+
return m.group(1)
|
|
186
|
+
if ppid <= 1:
|
|
187
|
+
return None
|
|
188
|
+
pid = ppid
|
|
189
|
+
return None
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def _detect_self_cid() -> Optional[str]:
|
|
193
|
+
"""Identify THIS conversation's id (jsonl stem). Resolution order:
|
|
194
|
+
|
|
195
|
+
1. env THREADKEEPER_FORCE_CID (set by spawn() for children)
|
|
196
|
+
2. ppid walk for `claude ... --resume/--session-id <uuid>` (per-process,
|
|
197
|
+
no flap; cached for the lifetime of the process)
|
|
198
|
+
3. fallback heuristic: latest-mtime jsonl (cached briefly; flaps when
|
|
199
|
+
siblings are equally active)
|
|
200
|
+
"""
|
|
201
|
+
global _self_cid, _self_cid_at, _self_cid_via
|
|
202
|
+
forced = os.environ.get("THREADKEEPER_FORCE_CID")
|
|
203
|
+
if forced:
|
|
204
|
+
_self_cid_via = "forced"
|
|
205
|
+
return forced
|
|
206
|
+
# ppid resolution: cache forever once found (process identity is stable).
|
|
207
|
+
if _self_cid and _self_cid_via == "ppid":
|
|
208
|
+
return _self_cid
|
|
209
|
+
if _self_cid is None or _self_cid_via != "ppid":
|
|
210
|
+
cid = _resolve_self_cid_via_ppid()
|
|
211
|
+
if cid:
|
|
212
|
+
_self_cid = cid
|
|
213
|
+
_self_cid_via = "ppid"
|
|
214
|
+
_self_cid_at = time.time()
|
|
215
|
+
return cid
|
|
216
|
+
# Heuristic fallback with short ttl
|
|
217
|
+
now_t = time.time()
|
|
218
|
+
if _self_cid and _self_cid_via == "mtime" and now_t - _self_cid_at < _self_cid_ttl_s:
|
|
219
|
+
return _self_cid
|
|
220
|
+
if not CLAUDE_PROJECTS_DIR.exists():
|
|
221
|
+
return None
|
|
222
|
+
latest_p = None
|
|
223
|
+
latest_m: float = 0.0
|
|
224
|
+
for p in CLAUDE_PROJECTS_DIR.glob("**/*.jsonl"):
|
|
225
|
+
try:
|
|
226
|
+
m = p.stat().st_mtime
|
|
227
|
+
except OSError:
|
|
228
|
+
continue
|
|
229
|
+
if m > latest_m:
|
|
230
|
+
latest_m = m
|
|
231
|
+
latest_p = p
|
|
232
|
+
if latest_p is None:
|
|
233
|
+
return None
|
|
234
|
+
_self_cid = latest_p.stem
|
|
235
|
+
_self_cid_via = "mtime"
|
|
236
|
+
_self_cid_at = now_t
|
|
237
|
+
return _self_cid
|