tokenknows-mcp 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mcp_server/__init__.py ADDED
@@ -0,0 +1,5 @@
1
+ """tokenknows-mcp · v2.0 · 把 TokenKnows 包成 MCP server 接入 Claude Code/Cowork."""
2
+
3
+ from mcp_server.server import mcp
4
+
5
+ __all__ = ["mcp"]
mcp_server/__main__.py ADDED
@@ -0,0 +1,6 @@
1
+ """Entry: python -m mcp_server."""
2
+
3
+ from mcp_server.server import main
4
+
5
+ if __name__ == "__main__":
6
+ main()
mcp_server/client.py ADDED
@@ -0,0 +1,86 @@
1
+ """TokenKnows backend HTTP client · MCP server 内部用.
2
+
3
+ 设计原则:
4
+ - 同进程跑可直接 import backend service (zero-network); 跨进程跑走 HTTP
5
+ - 默认 HTTP (8001) 让 plugin 可独立于 backend 部署
6
+ - timeout 30s; backend pipeline 长 (LLM call) 30-60s, 用户用 distill 命令时
7
+ 显式说"约 1 分钟", 不阻塞 MCP request 默认超时
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import os
13
+ from typing import Any
14
+
15
+ import httpx
16
+
17
+
18
+ DEFAULT_TIMEOUT = 60.0
19
+
20
+
21
+ class TokenKnowsClient:
22
+ """轻量 HTTP wrapper for tokenknows-api.
23
+
24
+ 使用:
25
+ client = TokenKnowsClient()
26
+ asset = await client.post('/api/v1/projects/p1/assets/generate',
27
+ json={'type': 'weekly_report'})
28
+ """
29
+
30
+ def __init__(
31
+ self,
32
+ base_url: str | None = None,
33
+ auth_token: str | None = None,
34
+ timeout: float = DEFAULT_TIMEOUT,
35
+ ) -> None:
36
+ self.base_url = (
37
+ base_url
38
+ or os.getenv("TOKENKNOWS_API_BASE")
39
+ or "http://127.0.0.1:8001"
40
+ ).rstrip("/")
41
+ self.auth_token = auth_token or os.getenv("TOKENKNOWS_API_TOKEN")
42
+ self.timeout = timeout
43
+ self._headers: dict[str, str] = {"Content-Type": "application/json"}
44
+ if self.auth_token:
45
+ self._headers["Authorization"] = f"Bearer {self.auth_token}"
46
+
47
+ async def get(self, path: str, params: dict | None = None) -> Any:
48
+ async with httpx.AsyncClient(timeout=self.timeout) as cli:
49
+ r = await cli.get(
50
+ f"{self.base_url}{path}", params=params, headers=self._headers,
51
+ )
52
+ r.raise_for_status()
53
+ return r.json()
54
+
55
+ async def post(self, path: str, json: dict | None = None) -> Any:
56
+ async with httpx.AsyncClient(timeout=self.timeout) as cli:
57
+ r = await cli.post(
58
+ f"{self.base_url}{path}", json=json or {}, headers=self._headers,
59
+ )
60
+ r.raise_for_status()
61
+ return r.json()
62
+
63
+ async def patch(self, path: str, json: dict | None = None) -> Any:
64
+ async with httpx.AsyncClient(timeout=self.timeout) as cli:
65
+ r = await cli.patch(
66
+ f"{self.base_url}{path}", json=json or {}, headers=self._headers,
67
+ )
68
+ r.raise_for_status()
69
+ return r.json()
70
+
71
+
72
+ _default_client: TokenKnowsClient | None = None
73
+
74
+
75
+ def get_client() -> TokenKnowsClient:
76
+ """单例; tests 可 monkeypatch."""
77
+ global _default_client
78
+ if _default_client is None:
79
+ _default_client = TokenKnowsClient()
80
+ return _default_client
81
+
82
+
83
+ def set_client(client: TokenKnowsClient) -> None:
84
+ """测试注入用."""
85
+ global _default_client
86
+ _default_client = client
mcp_server/daemon.py ADDED
@@ -0,0 +1,333 @@
1
+ """v2.0 T118 · session-watcher daemon.
2
+
3
+ 后台监听 ~/.claude/projects/*/sessions/*.jsonl, 增量解析新 line → 上报 events
4
+ 到 tokenknows-api backend. 配合 MCP server 形成"会话即素材"的双轨:
5
+
6
+ - 用户主动 /tokenknows:weekly → MCP 同步蒸馏 (T117)
7
+ - 后台 daemon 持续累积 events → 等用户随时蒸馏 (T118)
8
+
9
+ 启动:
10
+ python -m mcp_server.daemon # 默认 poll 30s
11
+ python -m mcp_server.daemon --interval 60 # 自定义
12
+ python -m mcp_server.daemon --once # 只跑一次 (cron 模式)
13
+
14
+ State 文件: ~/.tokenknows-watcher.json
15
+ { "files": { "<jsonl_path>": { "offset": <byte_offset>, "session_id": "..." } } }
16
+
17
+ dedup: external_id = f"{session_id}-{line_no}", backend 按 content_hash 去重.
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import argparse
23
+ import asyncio
24
+ import hashlib
25
+ import json
26
+ import logging
27
+ import os
28
+ import re
29
+ import signal
30
+ import sys
31
+ import time
32
+ from pathlib import Path
33
+ from typing import Any
34
+
35
+ from mcp_server.client import TokenKnowsClient
36
+
37
+
38
+ logger = logging.getLogger("tokenknows-watcher")
39
+
40
+ DEFAULT_PROJECTS_DIR = Path.home() / ".claude" / "projects"
41
+ DEFAULT_STATE_FILE = Path.home() / ".tokenknows-watcher.json"
42
+ DEFAULT_POLL_INTERVAL = 30
43
+ DEFAULT_BATCH_SIZE = 50
44
+
45
+ # 仅处理这两种 type (其它如 attachment/queue-operation/system 是 noise)
46
+ _VALID_TYPES = {"user", "assistant"}
47
+
48
+
49
+ def _load_state(path: Path) -> dict[str, Any]:
50
+ if not path.exists():
51
+ return {"files": {}}
52
+ try:
53
+ return json.loads(path.read_text(encoding="utf-8"))
54
+ except Exception as e: # noqa: BLE001
55
+ logger.warning("state file corrupt, resetting: %s", e)
56
+ return {"files": {}}
57
+
58
+
59
+ def _save_state(path: Path, state: dict) -> None:
60
+ path.parent.mkdir(parents=True, exist_ok=True)
61
+ path.write_text(json.dumps(state, indent=2), encoding="utf-8")
62
+
63
+
64
+ def _extract_text(message: dict) -> str:
65
+ """提取 message.content 文本 (content 可能是 str 或 list of blocks)."""
66
+ content = message.get("content", "")
67
+ if isinstance(content, str):
68
+ return content
69
+ if isinstance(content, list):
70
+ parts: list[str] = []
71
+ for blk in content:
72
+ if isinstance(blk, dict):
73
+ # text block
74
+ if blk.get("type") == "text":
75
+ parts.append(blk.get("text", ""))
76
+ # tool_use block - 不入正文, 但记其名字
77
+ elif blk.get("type") == "tool_use":
78
+ name = blk.get("name", "?")
79
+ parts.append(f"[tool_use: {name}]")
80
+ # tool_result - 截短
81
+ elif blk.get("type") == "tool_result":
82
+ res = blk.get("content", "")
83
+ if isinstance(res, list):
84
+ res = "".join(
85
+ b.get("text", "") if isinstance(b, dict) else str(b)
86
+ for b in res
87
+ )
88
+ parts.append(f"[tool_result: {str(res)[:200]}]")
89
+ else:
90
+ parts.append(str(blk))
91
+ return "\n".join(p for p in parts if p)
92
+ return str(content)
93
+
94
+
95
+ def _build_event(
96
+ record: dict, session_id: str, line_no: int,
97
+ ) -> dict[str, Any] | None:
98
+ """从 jsonl 一条记录构造 EventCreate dict; None 表示跳过 (不感兴趣的 type)."""
99
+ rec_type = record.get("type")
100
+ if rec_type not in _VALID_TYPES:
101
+ return None
102
+
103
+ message = record.get("message", {})
104
+ text = _extract_text(message)
105
+ if not text or len(text.strip()) < 5: # 太短无意义
106
+ return None
107
+
108
+ role = message.get("role", rec_type)
109
+ timestamp = record.get("timestamp") or message.get("created_at")
110
+ # external_id 用 session + msg uuid 或 line_no 兜底
111
+ msg_uuid = message.get("id") or record.get("uuid") or f"line-{line_no}"
112
+ external_id = f"{session_id}-{msg_uuid}"
113
+
114
+ title = text.strip().splitlines()[0][:60]
115
+ return {
116
+ "source_type": "claude_code",
117
+ "source_ref": session_id,
118
+ "external_id": external_id,
119
+ "event_type": "ai_conversation_turn",
120
+ "occurred_at": timestamp,
121
+ "author": {"name": "user" if role == "user" else "Claude"},
122
+ "title": title,
123
+ "content": text[:4000], # backend 限 8K; 留 buffer
124
+ "content_hash": hashlib.sha256(text.encode("utf-8")).hexdigest(),
125
+ "tags": ["claude-code-session", role],
126
+ "trust_score": 0.8 if role == "user" else 0.6,
127
+ }
128
+
129
+
130
+ def _scan_files(projects_dir: Path) -> list[Path]:
131
+ """枚举所有 jsonl session 文件."""
132
+ if not projects_dir.exists():
133
+ return []
134
+ out: list[Path] = []
135
+ for sub in projects_dir.iterdir():
136
+ if not sub.is_dir():
137
+ continue
138
+ for f in sub.glob("*.jsonl"):
139
+ out.append(f)
140
+ return sorted(out)
141
+
142
+
143
+ def _session_id_from_path(p: Path) -> str:
144
+ """jsonl 文件名 (去 .jsonl 后缀) 即 session_id."""
145
+ return p.stem
146
+
147
+
148
+ async def _flush_batch(
149
+ client: TokenKnowsClient, project_id: str, events: list[dict],
150
+ ) -> tuple[int, int]:
151
+ """批量上报 events 到 backend. 返回 (ingested, skipped)."""
152
+ if not events:
153
+ return 0, 0
154
+ try:
155
+ resp = await client.post(
156
+ f"/api/v1/projects/{project_id}/events",
157
+ json={"events": events},
158
+ )
159
+ return resp.get("ingested", 0), resp.get("skipped", 0)
160
+ except Exception as e: # noqa: BLE001
161
+ logger.warning("ingest failed (will retry next tick): %s", e)
162
+ return 0, 0
163
+
164
+
165
+ async def _scan_once(
166
+ client: TokenKnowsClient,
167
+ project_id: str,
168
+ projects_dir: Path,
169
+ state_file: Path,
170
+ batch_size: int,
171
+ ) -> dict[str, int]:
172
+ """扫一轮: 每个 jsonl 文件从 last_offset 起读新 line, 提交."""
173
+ state = _load_state(state_file)
174
+ files_state: dict[str, Any] = state.setdefault("files", {})
175
+
176
+ total_ingested = 0
177
+ total_skipped = 0
178
+ total_lines = 0
179
+
180
+ for jsonl in _scan_files(projects_dir):
181
+ key = str(jsonl)
182
+ entry = files_state.setdefault(key, {"offset": 0, "session_id": _session_id_from_path(jsonl)})
183
+ try:
184
+ size = jsonl.stat().st_size
185
+ except OSError:
186
+ continue
187
+ if size <= entry["offset"]:
188
+ continue # 无新增
189
+
190
+ # 读新 line
191
+ try:
192
+ with jsonl.open("rb") as f:
193
+ f.seek(entry["offset"])
194
+ new_blob = f.read()
195
+ new_text = new_blob.decode("utf-8", errors="ignore")
196
+ except OSError as e:
197
+ logger.warning("read %s failed: %s", jsonl, e)
198
+ continue
199
+
200
+ session_id = entry["session_id"]
201
+ batch: list[dict] = []
202
+ last_complete_offset = entry["offset"]
203
+ cursor_in_blob = 0
204
+ for line in new_text.splitlines(keepends=True):
205
+ # 不处理不完整的最后一行 (没换行符 → 还在写)
206
+ if not line.endswith("\n"):
207
+ break
208
+ cursor_in_blob += len(line.encode("utf-8"))
209
+ try:
210
+ record = json.loads(line)
211
+ except json.JSONDecodeError:
212
+ continue
213
+ total_lines += 1
214
+ line_no = record.get("line_no", 0) # 没有就 0, 不影响 dedup
215
+ ev = _build_event(record, session_id, line_no)
216
+ if ev:
217
+ batch.append(ev)
218
+ if len(batch) >= batch_size:
219
+ ing, skp = await _flush_batch(client, project_id, batch)
220
+ total_ingested += ing
221
+ total_skipped += skp
222
+ batch.clear()
223
+ # 推进 offset (按已处理 byte)
224
+ last_complete_offset = entry["offset"] + cursor_in_blob
225
+
226
+ # flush 尾批
227
+ if batch:
228
+ ing, skp = await _flush_batch(client, project_id, batch)
229
+ total_ingested += ing
230
+ total_skipped += skp
231
+ last_complete_offset = entry["offset"] + cursor_in_blob
232
+
233
+ entry["offset"] = last_complete_offset
234
+ files_state[key] = entry
235
+
236
+ state["files"] = files_state
237
+ _save_state(state_file, state)
238
+ return {
239
+ "lines": total_lines,
240
+ "ingested": total_ingested,
241
+ "skipped": total_skipped,
242
+ }
243
+
244
+
245
+ async def _run_loop(args: argparse.Namespace) -> None:
246
+ client = TokenKnowsClient()
247
+ project_id = os.getenv("TOKENKNOWS_DEFAULT_PROJECT")
248
+ if not project_id:
249
+ logger.error("TOKENKNOWS_DEFAULT_PROJECT 未设置, 退出")
250
+ sys.exit(2)
251
+
252
+ projects_dir = Path(args.projects_dir)
253
+ state_file = Path(args.state_file)
254
+ interval = args.interval
255
+ batch = args.batch_size
256
+
257
+ logger.info(
258
+ "watcher started: project=%s projects_dir=%s state=%s interval=%ds",
259
+ project_id, projects_dir, state_file, interval,
260
+ )
261
+
262
+ # SIGTERM/SIGINT 平滑退出
263
+ stop_event = asyncio.Event()
264
+ loop = asyncio.get_event_loop()
265
+ for sig in (signal.SIGTERM, signal.SIGINT):
266
+ try:
267
+ loop.add_signal_handler(sig, stop_event.set)
268
+ except NotImplementedError: # windows
269
+ pass
270
+
271
+ while not stop_event.is_set():
272
+ try:
273
+ stats = await _scan_once(
274
+ client, project_id, projects_dir, state_file, batch,
275
+ )
276
+ if stats["lines"] > 0:
277
+ logger.info(
278
+ "scan tick · lines=%d ingested=%d skipped=%d",
279
+ stats["lines"], stats["ingested"], stats["skipped"],
280
+ )
281
+ except Exception as e: # noqa: BLE001
282
+ logger.exception("scan tick failed: %s", e)
283
+
284
+ if args.once:
285
+ break
286
+ try:
287
+ await asyncio.wait_for(stop_event.wait(), timeout=interval)
288
+ except asyncio.TimeoutError:
289
+ pass
290
+
291
+ logger.info("watcher stopped")
292
+
293
+
294
+ def main() -> None:
295
+ parser = argparse.ArgumentParser(description="tokenknows session watcher")
296
+ parser.add_argument(
297
+ "--projects-dir", default=str(DEFAULT_PROJECTS_DIR),
298
+ help="Claude Code 项目目录 (默认 ~/.claude/projects)",
299
+ )
300
+ parser.add_argument(
301
+ "--state-file", default=str(DEFAULT_STATE_FILE),
302
+ help="watcher state json 路径 (默认 ~/.tokenknows-watcher.json)",
303
+ )
304
+ parser.add_argument(
305
+ "--interval", type=int, default=DEFAULT_POLL_INTERVAL,
306
+ help=f"轮询间隔秒 (默认 {DEFAULT_POLL_INTERVAL})",
307
+ )
308
+ parser.add_argument(
309
+ "--batch-size", type=int, default=DEFAULT_BATCH_SIZE,
310
+ help=f"单批最大 events (默认 {DEFAULT_BATCH_SIZE})",
311
+ )
312
+ parser.add_argument(
313
+ "--once", action="store_true",
314
+ help="只跑一次扫描就退出 (cron 模式)",
315
+ )
316
+ parser.add_argument(
317
+ "--log-level", default="INFO", choices=["DEBUG", "INFO", "WARNING", "ERROR"],
318
+ )
319
+ args = parser.parse_args()
320
+
321
+ logging.basicConfig(
322
+ level=args.log_level,
323
+ format="%(asctime)s %(levelname)s [%(name)s] %(message)s",
324
+ )
325
+
326
+ try:
327
+ asyncio.run(_run_loop(args))
328
+ except KeyboardInterrupt:
329
+ pass
330
+
331
+
332
+ if __name__ == "__main__":
333
+ main()
File without changes
File without changes
mcp_server/server.py ADDED
@@ -0,0 +1,383 @@
1
+ """tokenknows-mcp · v2.0 T117 · MCP server (FastMCP).
2
+
3
+ 为 Claude Code / Claude Cowork 等 MCP host 暴露 TokenKnows 蒸馏能力:
4
+ - tools: submit_session_events / distill_document / list_assets /
5
+ get_asset / get_asset_chapters / search_entity
6
+ - resources: tokenknows://asset/{id} 让 host 直接读 asset markdown
7
+ - prompts: 7 类蒸馏的标准 prompt 模板
8
+
9
+ 启动:
10
+ # stdio (Claude Code / Cowork 默认)
11
+ python -m mcp_server
12
+
13
+ # SSE (远程 / docker)
14
+ python -m mcp_server --transport sse --port 8765
15
+
16
+ 环境变量:
17
+ TOKENKNOWS_API_BASE backend URL (默认 http://127.0.0.1:8001)
18
+ TOKENKNOWS_API_TOKEN JWT bearer (可选)
19
+ TOKENKNOWS_DEFAULT_PROJECT 当前默认 project_id
20
+ """
21
+
22
+ from __future__ import annotations
23
+
24
+ import os
25
+ from typing import Literal
26
+
27
+ from mcp.server.fastmcp import FastMCP
28
+
29
+ from mcp_server.client import get_client
30
+
31
+ # T143 (2026-05-25) · 三改 · 改用 Anthropic Progressive Disclosure pattern.
32
+ # 详细 "MUST call" 规则搬到 skills/session_capture/SKILL.md (L2 lazy load,
33
+ # 只在 LLM 觉得 task 相关时才进 context), MCP server instructions 只留必要
34
+ # 的 host 映射 + tool 清单, tool docstring 只剩 args/return.
35
+ # 这样 startup 注入小 (节省 context), 但 LLM 调 skill 时拿到完整规则.
36
+ _MCP_INSTRUCTIONS = """\
37
+ # TokenKnows MCP
38
+
39
+ This server bridges your session into the TokenKnows knowledge base.
40
+
41
+ ## Available tools
42
+
43
+ - `submit_session_events` — persist conversation turns. **See the
44
+ `session_capture` skill in this plugin for when/how to call it.**
45
+ - `distill_document(type, project_id?, time_window?)` — trigger backend
46
+ 5-stage pipeline to produce 1 of 7 document types (weekly_report /
47
+ tech_design / adr / incident / book / agent_skill / knowledge_graph).
48
+ See `distill` skill for the full flow.
49
+ - `list_assets` / `get_asset` / `get_asset_chapters` — read distilled output.
50
+ - `search_entity(query, entity_type?)` — cross-document KG entity search.
51
+
52
+ ## Host source_type quick map
53
+
54
+ - Cowork Chat / Cowork tab → pass `source_type="claude_cowork"`
55
+ - Claude Code CLI → leave `source_type` unset (defaults `"claude_code"`)
56
+ """
57
+
58
+ mcp = FastMCP("tokenknows", instructions=_MCP_INSTRUCTIONS)
59
+
60
+
61
+ def _default_project_id(override: str | None = None) -> str:
62
+ """获取 default project_id (CLI flag > env > raise)."""
63
+ pid = override or os.getenv("TOKENKNOWS_DEFAULT_PROJECT")
64
+ if not pid:
65
+ raise ValueError(
66
+ "未指定 project_id. 设置环境变量 TOKENKNOWS_DEFAULT_PROJECT 或在"
67
+ "命令中传入 project_id 参数."
68
+ )
69
+ return pid
70
+
71
+
72
+ # ── tools ────────────────────────────────────────────────────────
73
+
74
+
75
+ @mcp.tool()
76
+ async def submit_session_events(
77
+ events: list[dict],
78
+ project_id: str | None = None,
79
+ ) -> dict:
80
+ """Persist conversation events into TokenKnows backend.
81
+
82
+ See the `session_capture` skill in this plugin for full call-timing
83
+ rules and examples (lazy-loaded, ~50 tokens at startup, full body
84
+ only when LLM determines relevance).
85
+
86
+ Args:
87
+ events: 1-100 events. Each item: {content (required), title?,
88
+ author_name?, event_type? (default "ai_conversation_turn"),
89
+ source_type? ("claude_cowork" in Cowork, default "claude_code"
90
+ elsewhere), source_ref?, external_id? (auto-hash), tags?}.
91
+ project_id: optional override; defaults TOKENKNOWS_DEFAULT_PROJECT.
92
+
93
+ Returns:
94
+ {"ingested": <new>, "skipped": <dup>, "project_id": "..."}
95
+ """
96
+ import hashlib
97
+ from datetime import datetime, timezone
98
+
99
+ pid = _default_project_id(project_id)
100
+ client = get_client()
101
+ now_iso = datetime.now(timezone.utc).isoformat()
102
+
103
+ payload_events = []
104
+ for ev in events[:100]:
105
+ content = ev.get("content", "")
106
+ ext_id = ev.get("external_id") or hashlib.sha1(
107
+ (ev.get("source_ref", "") + content[:200]).encode("utf-8"),
108
+ usedforsecurity=False,
109
+ ).hexdigest()[:16]
110
+ author = None
111
+ if ev.get("author_name"):
112
+ author = {"name": ev["author_name"]}
113
+ payload_events.append({
114
+ "source_type": ev.get("source_type", "claude_code"),
115
+ "source_ref": ev.get("source_ref", "claude-session"),
116
+ "external_id": ext_id,
117
+ "event_type": ev.get("event_type", "ai_conversation_turn"),
118
+ "occurred_at": ev.get("occurred_at") or now_iso,
119
+ "author": author,
120
+ "title": ev.get("title"),
121
+ "content": content,
122
+ "content_hash": hashlib.sha256(content.encode("utf-8")).hexdigest(),
123
+ "tags": ev.get("tags", []),
124
+ "trust_score": ev.get("trust_score"),
125
+ })
126
+
127
+ resp = await client.post(
128
+ f"/api/v1/projects/{pid}/events",
129
+ json={"events": payload_events},
130
+ )
131
+ return {
132
+ "ingested": resp.get("ingested", 0),
133
+ "skipped": resp.get("skipped", 0),
134
+ "project_id": pid,
135
+ }
136
+
137
+
138
+ @mcp.tool()
139
+ async def distill_document(
140
+ document_type: Literal[
141
+ "weekly_report", "tech_design", "adr", "incident",
142
+ "book", "agent_skill", "knowledge_graph",
143
+ ],
144
+ project_id: str | None = None,
145
+ time_window: str = "this_week",
146
+ model: str | None = None,
147
+ ) -> dict:
148
+ """触发 backend 5-stage pipeline 蒸馏 events → 文档.
149
+
150
+ Args:
151
+ document_type: 7 类之一 (周报 / 技术方案 / ADR / 复盘 / 书籍 /
152
+ Skill / 知识图谱)
153
+ project_id: 项目 id; 不传用 default
154
+ time_window: 时间窗 (this_week/last_week/last_7_days/last_14_days/last_30_days)
155
+ model: 显式指定 model (e.g. "claude-sonnet-4-6"); 不传走 task 默认
156
+
157
+ Returns:
158
+ {
159
+ "asset_id": "...",
160
+ "status": "generating",
161
+ "title": "...",
162
+ "view_url": "/projects/{pid}/documents/{aid}",
163
+ "estimated_seconds": 60,
164
+ "note": "可调 get_asset 轮询完成状态"
165
+ }
166
+ """
167
+ pid = _default_project_id(project_id)
168
+ client = get_client()
169
+ payload: dict = {"type": document_type, "time_window": time_window}
170
+ if model:
171
+ payload["model_override"] = model
172
+ resp = await client.post(
173
+ f"/api/v1/projects/{pid}/assets/generate", json=payload,
174
+ )
175
+ aid = resp["id"]
176
+ return {
177
+ "asset_id": aid,
178
+ "status": resp["status"],
179
+ "title": resp["title"],
180
+ "view_url": f"/projects/{pid}/documents/{aid}",
181
+ "estimated_seconds": 60,
182
+ "note": "调 get_asset(asset_id) 查完成状态; status='draft' 即可读 markdown.",
183
+ }
184
+
185
+
186
+ @mcp.tool()
187
+ async def list_assets(
188
+ project_id: str | None = None,
189
+ asset_type: str | None = None,
190
+ status: str | None = None,
191
+ limit: int = 20,
192
+ ) -> dict:
193
+ """列项目下的蒸馏文档.
194
+
195
+ Args:
196
+ project_id: 项目 id; 不传用 default
197
+ asset_type: 过滤 weekly_report/tech_design/.../knowledge_graph
198
+ status: 过滤 generating/draft/in_review/approved/published
199
+ limit: 1-100, 默认 20
200
+ """
201
+ pid = _default_project_id(project_id)
202
+ params: dict = {"limit": limit}
203
+ if asset_type:
204
+ params["type"] = asset_type
205
+ if status:
206
+ params["status"] = status
207
+ client = get_client()
208
+ resp = await client.get(f"/api/v1/projects/{pid}/assets", params=params)
209
+ # 精简返回: 只 id/type/title/status/metrics/kg_summary, 不带 thumbnail (太大)
210
+ items = []
211
+ for a in resp.get("data", []):
212
+ item = {
213
+ "id": a["id"],
214
+ "type": a["type"],
215
+ "title": a["title"],
216
+ "status": a["status"],
217
+ "version": a["current_version"],
218
+ "updated_at": a["updated_at"],
219
+ }
220
+ if a.get("metrics"):
221
+ item["metrics"] = a["metrics"]
222
+ if a.get("kg_summary"):
223
+ item["kg_summary"] = {
224
+ "node_count": a["kg_summary"].get("node_count"),
225
+ "edge_count": a["kg_summary"].get("edge_count"),
226
+ }
227
+ items.append(item)
228
+ return {"total": resp.get("meta", {}).get("total", 0), "items": items}
229
+
230
+
231
+ @mcp.tool()
232
+ async def get_asset(asset_id: str) -> dict:
233
+ """读单个 asset 元数据 (不含 chapter content).
234
+
235
+ 用于轮询 distill 完成状态. 完整内容用 get_asset_chapters 或读 resource.
236
+ """
237
+ client = get_client()
238
+ a = await client.get(f"/api/v1/assets/{asset_id}")
239
+ return {
240
+ "id": a["id"],
241
+ "type": a["type"],
242
+ "title": a["title"],
243
+ "status": a["status"],
244
+ "version": a["current_version"],
245
+ "approval_state": a["approval_state"],
246
+ "metrics": a.get("metrics"),
247
+ "kg_summary": a.get("kg_summary"),
248
+ "updated_at": a["updated_at"],
249
+ }
250
+
251
+
252
+ @mcp.tool()
253
+ async def get_asset_chapters(asset_id: str) -> list[dict]:
254
+ """读 asset 的所有 chapter (含 markdown content + layout).
255
+
256
+ 对 knowledge_graph 类型, layout 含 nodes/edges/thumbnail_svg.
257
+ 对其它 7 类, content 是 markdown 正文.
258
+ """
259
+ client = get_client()
260
+ chs = await client.get(f"/api/v1/assets/{asset_id}/chapters")
261
+ out: list[dict] = []
262
+ for c in chs:
263
+ item = {
264
+ "id": c["id"],
265
+ "title": c["title"],
266
+ "order_index": c["order_index"],
267
+ "content": c["content"],
268
+ "approval_state": c.get("approval_state", "pending"),
269
+ }
270
+ if c.get("layout"):
271
+ # KG: 简化 layout 只返回结构性字段, 不返回 thumbnail (前端用)
272
+ layout = c["layout"]
273
+ if "nodes" in layout:
274
+ item["kg_layout"] = {
275
+ "nodes": layout.get("nodes", []),
276
+ "edges": layout.get("edges", []),
277
+ }
278
+ out.append(item)
279
+ return out
280
+
281
+
282
+ @mcp.tool()
283
+ async def search_entity(
284
+ query: str,
285
+ project_id: str | None = None,
286
+ entity_type: Literal["person", "event", "concept", "artifact"] | None = None,
287
+ min_assets: int = 1,
288
+ ) -> list[dict]:
289
+ """跨文档实体搜索 (KG entity_registry).
290
+
291
+ 例: search_entity('Alice') → 返回 Alice 出现在哪些 KG asset 里.
292
+
293
+ Args:
294
+ query: label / aliases 模糊匹配
295
+ project_id: 项目 id; 不传用 default
296
+ entity_type: 过滤 person/event/concept/artifact
297
+ min_assets: 仅返回出现在 ≥N 个 asset 的 (跨文档实体)
298
+ """
299
+ pid = _default_project_id(project_id)
300
+ params: dict = {"q": query, "min_assets": min_assets}
301
+ if entity_type:
302
+ params["type"] = entity_type
303
+ client = get_client()
304
+ entities = await client.get(
305
+ f"/api/v1/projects/{pid}/entities", params=params,
306
+ )
307
+ return [
308
+ {
309
+ "id": e["id"],
310
+ "type": e["type"],
311
+ "label": e["label"],
312
+ "aliases": e.get("aliases", []),
313
+ "asset_count": e.get("asset_count", 0),
314
+ "source_refs": e.get("source_refs", []),
315
+ }
316
+ for e in entities
317
+ ]
318
+
319
+
320
+ # ── resources ─────────────────────────────────────────────────────
321
+
322
+
323
+ @mcp.resource("tokenknows://asset/{asset_id}")
324
+ async def asset_resource(asset_id: str) -> str:
325
+ """以 markdown 形式读单个 asset (所有 chapter 拼接).
326
+
327
+ Host (Claude) 可通过 @-mention 直接引用: @tokenknows://asset/demo-kg-001
328
+ """
329
+ client = get_client()
330
+ asset = await client.get(f"/api/v1/assets/{asset_id}")
331
+ chapters = await client.get(f"/api/v1/assets/{asset_id}/chapters")
332
+ parts = [f"# {asset['title']}", "", f"_type={asset['type']} · status={asset['status']} · v{asset['current_version']}_", ""]
333
+ for c in chapters:
334
+ parts.extend([f"## {c['title']}", "", c.get("content", ""), ""])
335
+ return "\n".join(parts)
336
+
337
+
338
+ # ── prompts ───────────────────────────────────────────────────────
339
+
340
+
341
+ @mcp.prompt()
342
+ def distill_session(document_type: str = "weekly_report") -> str:
343
+ """模板: 把当前 session 蒸馏成指定文档类型.
344
+
345
+ Args:
346
+ document_type: weekly_report / tech_design / adr / incident / book /
347
+ agent_skill / knowledge_graph
348
+ """
349
+ return f"""请把我们这个 Claude session 的对话蒸馏成 **{document_type}** 类型文档:
350
+
351
+ 1. 用 `submit_session_events` 工具把本次对话的关键节点 (用户的需求 / 你的方案 /
352
+ 关键代码变更 / 决策与权衡) 整理成 3-10 条 event 提交;
353
+ 2. 调 `distill_document(document_type='{document_type}')` 触发后端流水线;
354
+ 3. 用 `get_asset(asset_id)` 轮询 status (≤60s 应变 'draft');
355
+ 4. 完成后用 `get_asset_chapters` 拉 markdown 给我看;
356
+ 5. 如果是 knowledge_graph 类型, 用 `search_entity` 查关键人物/概念跨文档出现.
357
+ """
358
+
359
+
360
+ # ── entry ─────────────────────────────────────────────────────────
361
+
362
+
363
+ def main() -> None:
364
+ """CLI entry: python -m mcp_server."""
365
+ import argparse
366
+
367
+ parser = argparse.ArgumentParser(description="TokenKnows MCP server")
368
+ parser.add_argument(
369
+ "--transport", choices=["stdio", "sse"], default="stdio",
370
+ help="MCP transport (stdio for Claude Code/Cowork; sse for remote)",
371
+ )
372
+ parser.add_argument(
373
+ "--port", type=int, default=8765,
374
+ help="SSE 端口 (仅 transport=sse 时)",
375
+ )
376
+ args = parser.parse_args()
377
+ if args.transport == "sse":
378
+ mcp.settings.port = args.port
379
+ mcp.run(transport=args.transport)
380
+
381
+
382
+ if __name__ == "__main__":
383
+ main()
File without changes
@@ -0,0 +1,80 @@
1
+ Metadata-Version: 2.4
2
+ Name: tokenknows-mcp
3
+ Version: 0.2.1
4
+ Summary: TokenKnows MCP server — distill AI coding sessions (Claude Code / Codex / Cursor) into weekly reports, ADRs and a knowledge graph on your self-hosted workbench
5
+ Project-URL: Homepage, https://github.com/johnnywuj81/tokenknows
6
+ Project-URL: Repository, https://github.com/johnnywuj81/tokenknows
7
+ Project-URL: Issues, https://github.com/johnnywuj81/tokenknows/issues
8
+ Author: johnnywuj81
9
+ License-Expression: MIT
10
+ Keywords: claude-code,knowledge-graph,knowledge-management,mcp,model-context-protocol,self-hosted
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Topic :: Software Development
16
+ Requires-Python: >=3.11
17
+ Requires-Dist: httpx>=0.27.0
18
+ Requires-Dist: mcp>=1.2.0
19
+ Description-Content-Type: text/markdown
20
+
21
+ # TokenKnows MCP Server
22
+
23
+ <!-- mcp-name: io.github.johnnywuj81/tokenknows -->
24
+
25
+ MCP server for [TokenKnows](https://github.com/johnnywuj81/tokenknows) — a self-hosted engineering knowledge workbench that captures AI coding sessions (Claude Code / Codex / Cursor / VS Code) and distills them into structured documents: weekly reports, tech designs, ADRs, incident reviews, long-form books, agent skills and a knowledge graph, via a 5-stage LLM pipeline. Evidence-linked: every distilled claim links back to source session events.
26
+
27
+ ## Prerequisites
28
+
29
+ This server is the bridge between your MCP host and a **self-hosted TokenKnows backend** (default `http://127.0.0.1:8001`). Deploy the backend first — see the [main repository](https://github.com/johnnywuj81/tokenknows). Local-first: your data goes only to the backend you configure.
30
+
31
+ ## Install & run
32
+
33
+ ```bash
34
+ # Run directly (stdio, for Claude Code / Cowork / Cursor)
35
+ uvx tokenknows-mcp
36
+
37
+ # Or install then run
38
+ pip install tokenknows-mcp
39
+ tokenknows-mcp
40
+
41
+ # SSE transport for remote / docker setups
42
+ tokenknows-mcp --transport sse --port 8765
43
+ ```
44
+
45
+ ### Claude Code config example
46
+
47
+ ```json
48
+ {
49
+ "mcpServers": {
50
+ "tokenknows": {
51
+ "command": "uvx",
52
+ "args": ["tokenknows-mcp"],
53
+ "env": { "TOKENKNOWS_API_BASE": "http://127.0.0.1:8001" }
54
+ }
55
+ }
56
+ }
57
+ ```
58
+
59
+ Tip: in Claude Code you can instead install the full plugin (MCP server + slash commands + skills): `/plugin marketplace add johnnywuj81/tokenknows` → `/plugin install tokenknows@tokenknows`.
60
+
61
+ ## Environment variables
62
+
63
+ | Variable | Default | Description |
64
+ |---|---|---|
65
+ | `TOKENKNOWS_API_BASE` | `http://127.0.0.1:8001` | Self-hosted TokenKnows backend URL |
66
+ | `TOKENKNOWS_API_TOKEN` | — | JWT bearer token (optional) |
67
+ | `TOKENKNOWS_DEFAULT_PROJECT` | — | Default project_id for event submission |
68
+
69
+ ## Tools
70
+
71
+ - `submit_session_events` — persist conversation turns into the knowledge base
72
+ - `distill_document` — trigger the 5-stage pipeline (weekly_report / tech_design / adr / incident / book / agent_skill / knowledge_graph)
73
+ - `list_assets` / `get_asset` / `get_asset_chapters` — read distilled output
74
+ - `search_entity` — cross-document knowledge-graph entity search
75
+
76
+ Plus `tokenknows://asset/{id}` resources and prompt templates for all 7 document types.
77
+
78
+ ## License
79
+
80
+ [MIT](https://github.com/johnnywuj81/tokenknows/blob/main/LICENSE) — source of truth for this package lives in [`code/tokenknows-api/mcp_server`](https://github.com/johnnywuj81/tokenknows/tree/main/code/tokenknows-api/mcp_server).
@@ -0,0 +1,12 @@
1
+ mcp_server/__init__.py,sha256=gLZCqfNf5-hP9bVnMTF-nSY7a6tm10vsyHE_zBMQTj8,146
2
+ mcp_server/__main__.py,sha256=hwPnEb6KNblf1zStE03O92_RfUFcv2w3F3Yddh8GPBY,110
3
+ mcp_server/client.py,sha256=4Yr1zTZ_q5SK6MM9XU5BSwx8XX3Vlj-OtSb3oykQVW8,2736
4
+ mcp_server/daemon.py,sha256=yaNkUlinboK4XCtWe_38EqTWmRocPZkMPrsZzENTNrA,11005
5
+ mcp_server/server.py,sha256=2WGr1qsUDhsD4AfKSnDoT_wZsmthlqTqkJhss2z7Wk4,13760
6
+ mcp_server/prompts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ mcp_server/resources/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
+ mcp_server/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
+ tokenknows_mcp-0.2.1.dist-info/METADATA,sha256=-LeWL_rsh7ZrS3MAUHWaQqCXCovg5_-f1bGnivQAHGY,3456
10
+ tokenknows_mcp-0.2.1.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
11
+ tokenknows_mcp-0.2.1.dist-info/entry_points.txt,sha256=dEYHp5rbVQ4QIzJpMIy0qKIuzBW8W7nPbtsk3HS1k-k,58
12
+ tokenknows_mcp-0.2.1.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ tokenknows-mcp = mcp_server.server:main