PyPI - tokenknows-mcp - Versions diffs - 0.2.1__py3-none-any.whl - Mend

tokenknows-mcp 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

mcp_server/__init__.py +5 -0
mcp_server/__main__.py +6 -0
mcp_server/client.py +86 -0
mcp_server/daemon.py +333 -0
mcp_server/prompts/__init__.py +0 -0
mcp_server/resources/__init__.py +0 -0
mcp_server/server.py +383 -0
mcp_server/tools/__init__.py +0 -0
tokenknows_mcp-0.2.1.dist-info/METADATA +80 -0
tokenknows_mcp-0.2.1.dist-info/RECORD +12 -0
tokenknows_mcp-0.2.1.dist-info/WHEEL +4 -0
tokenknows_mcp-0.2.1.dist-info/entry_points.txt +2 -0

mcp_server/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""tokenknows-mcp · v2.0 · 把 TokenKnows 包成 MCP server 接入 Claude Code/Cowork."""
+from mcp_server.server import mcp
+__all__ = ["mcp"]

mcp_server/__main__.py ADDED Viewed

@@ -0,0 +1,6 @@
+"""Entry: python -m mcp_server."""
+from mcp_server.server import main
+if __name__ == "__main__":
+    main()

mcp_server/client.py ADDED Viewed

@@ -0,0 +1,86 @@
+"""TokenKnows backend HTTP client · MCP server 内部用.
+设计原则:
+  - 同进程跑可直接 import backend service (zero-network); 跨进程跑走 HTTP
+  - 默认 HTTP (8001) 让 plugin 可独立于 backend 部署
+  - timeout 30s; backend pipeline 长 (LLM call) 30-60s, 用户用 distill 命令时
+    显式说"约 1 分钟", 不阻塞 MCP request 默认超时
+"""
+from __future__ import annotations
+import os
+from typing import Any
+import httpx
+DEFAULT_TIMEOUT = 60.0
+class TokenKnowsClient:
+    """轻量 HTTP wrapper for tokenknows-api.
+    使用:
+        client = TokenKnowsClient()
+        asset = await client.post('/api/v1/projects/p1/assets/generate',
+                                  json={'type': 'weekly_report'})
+    """
+    def __init__(
+        self,
+        base_url: str | None = None,
+        auth_token: str | None = None,
+        timeout: float = DEFAULT_TIMEOUT,
+    ) -> None:
+        self.base_url = (
+            base_url
+            or os.getenv("TOKENKNOWS_API_BASE")
+            or "http://127.0.0.1:8001"
+        ).rstrip("/")
+        self.auth_token = auth_token or os.getenv("TOKENKNOWS_API_TOKEN")
+        self.timeout = timeout
+        self._headers: dict[str, str] = {"Content-Type": "application/json"}
+        if self.auth_token:
+            self._headers["Authorization"] = f"Bearer {self.auth_token}"
+    async def get(self, path: str, params: dict | None = None) -> Any:
+        async with httpx.AsyncClient(timeout=self.timeout) as cli:
+            r = await cli.get(
+                f"{self.base_url}{path}", params=params, headers=self._headers,
+            )
+            r.raise_for_status()
+            return r.json()
+    async def post(self, path: str, json: dict | None = None) -> Any:
+        async with httpx.AsyncClient(timeout=self.timeout) as cli:
+            r = await cli.post(
+                f"{self.base_url}{path}", json=json or {}, headers=self._headers,
+            )
+            r.raise_for_status()
+            return r.json()
+    async def patch(self, path: str, json: dict | None = None) -> Any:
+        async with httpx.AsyncClient(timeout=self.timeout) as cli:
+            r = await cli.patch(
+                f"{self.base_url}{path}", json=json or {}, headers=self._headers,
+            )
+            r.raise_for_status()
+            return r.json()
+_default_client: TokenKnowsClient | None = None
+def get_client() -> TokenKnowsClient:
+    """单例; tests 可 monkeypatch."""
+    global _default_client
+    if _default_client is None:
+        _default_client = TokenKnowsClient()
+    return _default_client
+def set_client(client: TokenKnowsClient) -> None:
+    """测试注入用."""
+    global _default_client
+    _default_client = client

mcp_server/daemon.py ADDED Viewed

@@ -0,0 +1,333 @@
+"""v2.0 T118 · session-watcher daemon.
+后台监听 ~/.claude/projects/*/sessions/*.jsonl, 增量解析新 line → 上报 events
+到 tokenknows-api backend. 配合 MCP server 形成"会话即素材"的双轨:
+  - 用户主动 /tokenknows:weekly  → MCP 同步蒸馏 (T117)
+  - 后台 daemon 持续累积 events → 等用户随时蒸馏 (T118)
+启动:
+    python -m mcp_server.daemon                # 默认 poll 30s
+    python -m mcp_server.daemon --interval 60  # 自定义
+    python -m mcp_server.daemon --once         # 只跑一次 (cron 模式)
+State 文件: ~/.tokenknows-watcher.json
+  { "files": { "<jsonl_path>": { "offset": <byte_offset>, "session_id": "..." } } }
+dedup: external_id = f"{session_id}-{line_no}", backend 按 content_hash 去重.
+"""
+from __future__ import annotations
+import argparse
+import asyncio
+import hashlib
+import json
+import logging
+import os
+import re
+import signal
+import sys
+import time
+from pathlib import Path
+from typing import Any
+from mcp_server.client import TokenKnowsClient
+logger = logging.getLogger("tokenknows-watcher")
+DEFAULT_PROJECTS_DIR = Path.home() / ".claude" / "projects"
+DEFAULT_STATE_FILE = Path.home() / ".tokenknows-watcher.json"
+DEFAULT_POLL_INTERVAL = 30
+DEFAULT_BATCH_SIZE = 50
+# 仅处理这两种 type (其它如 attachment/queue-operation/system 是 noise)
+_VALID_TYPES = {"user", "assistant"}
+def _load_state(path: Path) -> dict[str, Any]:
+    if not path.exists():
+        return {"files": {}}
+    try:
+        return json.loads(path.read_text(encoding="utf-8"))
+    except Exception as e:  # noqa: BLE001
+        logger.warning("state file corrupt, resetting: %s", e)
+        return {"files": {}}
+def _save_state(path: Path, state: dict) -> None:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(json.dumps(state, indent=2), encoding="utf-8")
+def _extract_text(message: dict) -> str:
+    """提取 message.content 文本 (content 可能是 str 或 list of blocks)."""
+    content = message.get("content", "")
+    if isinstance(content, str):
+        return content
+    if isinstance(content, list):
+        parts: list[str] = []
+        for blk in content:
+            if isinstance(blk, dict):
+                # text block
+                if blk.get("type") == "text":
+                    parts.append(blk.get("text", ""))
+                # tool_use block - 不入正文, 但记其名字
+                elif blk.get("type") == "tool_use":
+                    name = blk.get("name", "?")
+                    parts.append(f"[tool_use: {name}]")
+                # tool_result - 截短
+                elif blk.get("type") == "tool_result":
+                    res = blk.get("content", "")
+                    if isinstance(res, list):
+                        res = "".join(
+                            b.get("text", "") if isinstance(b, dict) else str(b)
+                            for b in res
+                        )
+                    parts.append(f"[tool_result: {str(res)[:200]}]")
+            else:
+                parts.append(str(blk))
+        return "\n".join(p for p in parts if p)
+    return str(content)
+def _build_event(
+    record: dict, session_id: str, line_no: int,
+) -> dict[str, Any] | None:
+    """从 jsonl 一条记录构造 EventCreate dict; None 表示跳过 (不感兴趣的 type)."""
+    rec_type = record.get("type")
+    if rec_type not in _VALID_TYPES:
+        return None
+    message = record.get("message", {})
+    text = _extract_text(message)
+    if not text or len(text.strip()) < 5:  # 太短无意义
+        return None
+    role = message.get("role", rec_type)
+    timestamp = record.get("timestamp") or message.get("created_at")
+    # external_id 用 session + msg uuid 或 line_no 兜底
+    msg_uuid = message.get("id") or record.get("uuid") or f"line-{line_no}"
+    external_id = f"{session_id}-{msg_uuid}"
+    title = text.strip().splitlines()[0][:60]
+    return {
+        "source_type": "claude_code",
+        "source_ref": session_id,
+        "external_id": external_id,
+        "event_type": "ai_conversation_turn",
+        "occurred_at": timestamp,
+        "author": {"name": "user" if role == "user" else "Claude"},
+        "title": title,
+        "content": text[:4000],  # backend 限 8K; 留 buffer
+        "content_hash": hashlib.sha256(text.encode("utf-8")).hexdigest(),
+        "tags": ["claude-code-session", role],
+        "trust_score": 0.8 if role == "user" else 0.6,
+    }
+def _scan_files(projects_dir: Path) -> list[Path]:
+    """枚举所有 jsonl session 文件."""
+    if not projects_dir.exists():
+        return []
+    out: list[Path] = []
+    for sub in projects_dir.iterdir():
+        if not sub.is_dir():
+            continue
+        for f in sub.glob("*.jsonl"):
+            out.append(f)
+    return sorted(out)
+def _session_id_from_path(p: Path) -> str:
+    """jsonl 文件名 (去 .jsonl 后缀) 即 session_id."""
+    return p.stem
+async def _flush_batch(
+    client: TokenKnowsClient, project_id: str, events: list[dict],
+) -> tuple[int, int]:
+    """批量上报 events 到 backend. 返回 (ingested, skipped)."""
+    if not events:
+        return 0, 0
+    try:
+        resp = await client.post(
+            f"/api/v1/projects/{project_id}/events",
+            json={"events": events},
+        )
+        return resp.get("ingested", 0), resp.get("skipped", 0)
+    except Exception as e:  # noqa: BLE001
+        logger.warning("ingest failed (will retry next tick): %s", e)
+        return 0, 0
+async def _scan_once(
+    client: TokenKnowsClient,
+    project_id: str,
+    projects_dir: Path,
+    state_file: Path,
+    batch_size: int,
+) -> dict[str, int]:
+    """扫一轮: 每个 jsonl 文件从 last_offset 起读新 line, 提交."""
+    state = _load_state(state_file)
+    files_state: dict[str, Any] = state.setdefault("files", {})
+    total_ingested = 0
+    total_skipped = 0
+    total_lines = 0
+    for jsonl in _scan_files(projects_dir):
+        key = str(jsonl)
+        entry = files_state.setdefault(key, {"offset": 0, "session_id": _session_id_from_path(jsonl)})
+        try:
+            size = jsonl.stat().st_size
+        except OSError:
+            continue
+        if size <= entry["offset"]:
+            continue  # 无新增
+        # 读新 line
+        try:
+            with jsonl.open("rb") as f:
+                f.seek(entry["offset"])
+                new_blob = f.read()
+            new_text = new_blob.decode("utf-8", errors="ignore")
+        except OSError as e:
+            logger.warning("read %s failed: %s", jsonl, e)
+            continue
+        session_id = entry["session_id"]
+        batch: list[dict] = []
+        last_complete_offset = entry["offset"]
+        cursor_in_blob = 0
+        for line in new_text.splitlines(keepends=True):
+            # 不处理不完整的最后一行 (没换行符 → 还在写)
+            if not line.endswith("\n"):
+                break
+            cursor_in_blob += len(line.encode("utf-8"))
+            try:
+                record = json.loads(line)
+            except json.JSONDecodeError:
+                continue
+            total_lines += 1
+            line_no = record.get("line_no", 0)  # 没有就 0, 不影响 dedup
+            ev = _build_event(record, session_id, line_no)
+            if ev:
+                batch.append(ev)
+            if len(batch) >= batch_size:
+                ing, skp = await _flush_batch(client, project_id, batch)
+                total_ingested += ing
+                total_skipped += skp
+                batch.clear()
+                # 推进 offset (按已处理 byte)
+                last_complete_offset = entry["offset"] + cursor_in_blob
+        # flush 尾批
+        if batch:
+            ing, skp = await _flush_batch(client, project_id, batch)
+            total_ingested += ing
+            total_skipped += skp
+            last_complete_offset = entry["offset"] + cursor_in_blob
+        entry["offset"] = last_complete_offset
+        files_state[key] = entry
+    state["files"] = files_state
+    _save_state(state_file, state)
+    return {
+        "lines": total_lines,
+        "ingested": total_ingested,
+        "skipped": total_skipped,
+    }
+async def _run_loop(args: argparse.Namespace) -> None:
+    client = TokenKnowsClient()
+    project_id = os.getenv("TOKENKNOWS_DEFAULT_PROJECT")
+    if not project_id:
+        logger.error("TOKENKNOWS_DEFAULT_PROJECT 未设置, 退出")
+        sys.exit(2)
+    projects_dir = Path(args.projects_dir)
+    state_file = Path(args.state_file)
+    interval = args.interval
+    batch = args.batch_size
+    logger.info(
+        "watcher started: project=%s projects_dir=%s state=%s interval=%ds",
+        project_id, projects_dir, state_file, interval,
+    )
+    # SIGTERM/SIGINT 平滑退出
+    stop_event = asyncio.Event()
+    loop = asyncio.get_event_loop()
+    for sig in (signal.SIGTERM, signal.SIGINT):
+        try:
+            loop.add_signal_handler(sig, stop_event.set)
+        except NotImplementedError:  # windows
+            pass
+    while not stop_event.is_set():
+        try:
+            stats = await _scan_once(
+                client, project_id, projects_dir, state_file, batch,
+            )
+            if stats["lines"] > 0:
+                logger.info(
+                    "scan tick · lines=%d ingested=%d skipped=%d",
+                    stats["lines"], stats["ingested"], stats["skipped"],
+                )
+        except Exception as e:  # noqa: BLE001
+            logger.exception("scan tick failed: %s", e)
+        if args.once:
+            break
+        try:
+            await asyncio.wait_for(stop_event.wait(), timeout=interval)
+        except asyncio.TimeoutError:
+            pass
+    logger.info("watcher stopped")
+def main() -> None:
+    parser = argparse.ArgumentParser(description="tokenknows session watcher")
+    parser.add_argument(
+        "--projects-dir", default=str(DEFAULT_PROJECTS_DIR),
+        help="Claude Code 项目目录 (默认 ~/.claude/projects)",
+    )
+    parser.add_argument(
+        "--state-file", default=str(DEFAULT_STATE_FILE),
+        help="watcher state json 路径 (默认 ~/.tokenknows-watcher.json)",
+    )
+    parser.add_argument(
+        "--interval", type=int, default=DEFAULT_POLL_INTERVAL,
+        help=f"轮询间隔秒 (默认 {DEFAULT_POLL_INTERVAL})",
+    )
+    parser.add_argument(
+        "--batch-size", type=int, default=DEFAULT_BATCH_SIZE,
+        help=f"单批最大 events (默认 {DEFAULT_BATCH_SIZE})",
+    )
+    parser.add_argument(
+        "--once", action="store_true",
+        help="只跑一次扫描就退出 (cron 模式)",
+    )
+    parser.add_argument(
+        "--log-level", default="INFO", choices=["DEBUG", "INFO", "WARNING", "ERROR"],
+    )
+    args = parser.parse_args()
+    logging.basicConfig(
+        level=args.log_level,
+        format="%(asctime)s %(levelname)s [%(name)s] %(message)s",
+    )
+    try:
+        asyncio.run(_run_loop(args))
+    except KeyboardInterrupt:
+        pass
+if __name__ == "__main__":
+    main()

mcp_server/prompts/__init__.py ADDED Viewed

File without changes

mcp_server/resources/__init__.py ADDED Viewed

File without changes

mcp_server/server.py ADDED Viewed

@@ -0,0 +1,383 @@
+"""tokenknows-mcp · v2.0 T117 · MCP server (FastMCP).
+为 Claude Code / Claude Cowork 等 MCP host 暴露 TokenKnows 蒸馏能力:
+  - tools: submit_session_events / distill_document / list_assets /
+           get_asset / get_asset_chapters / search_entity
+  - resources: tokenknows://asset/{id} 让 host 直接读 asset markdown
+  - prompts: 7 类蒸馏的标准 prompt 模板
+启动:
+    # stdio (Claude Code / Cowork 默认)
+    python -m mcp_server
+    # SSE (远程 / docker)
+    python -m mcp_server --transport sse --port 8765
+环境变量:
+    TOKENKNOWS_API_BASE      backend URL (默认 http://127.0.0.1:8001)
+    TOKENKNOWS_API_TOKEN     JWT bearer (可选)
+    TOKENKNOWS_DEFAULT_PROJECT  当前默认 project_id
+"""
+from __future__ import annotations
+import os
+from typing import Literal
+from mcp.server.fastmcp import FastMCP
+from mcp_server.client import get_client
+# T143 (2026-05-25) · 三改 · 改用 Anthropic Progressive Disclosure pattern.
+# 详细 "MUST call" 规则搬到 skills/session_capture/SKILL.md (L2 lazy load,
+# 只在 LLM 觉得 task 相关时才进 context), MCP server instructions 只留必要
+# 的 host 映射 + tool 清单, tool docstring 只剩 args/return.
+# 这样 startup 注入小 (节省 context), 但 LLM 调 skill 时拿到完整规则.
+_MCP_INSTRUCTIONS = """\
+# TokenKnows MCP
+This server bridges your session into the TokenKnows knowledge base.
+## Available tools
+- `submit_session_events` — persist conversation turns. **See the
+  `session_capture` skill in this plugin for when/how to call it.**
+- `distill_document(type, project_id?, time_window?)` — trigger backend
+  5-stage pipeline to produce 1 of 7 document types (weekly_report /
+  tech_design / adr / incident / book / agent_skill / knowledge_graph).
+  See `distill` skill for the full flow.
+- `list_assets` / `get_asset` / `get_asset_chapters` — read distilled output.
+- `search_entity(query, entity_type?)` — cross-document KG entity search.
+## Host source_type quick map
+- Cowork Chat / Cowork tab → pass `source_type="claude_cowork"`
+- Claude Code CLI → leave `source_type` unset (defaults `"claude_code"`)
+"""
+mcp = FastMCP("tokenknows", instructions=_MCP_INSTRUCTIONS)
+def _default_project_id(override: str | None = None) -> str:
+    """获取 default project_id (CLI flag > env > raise)."""
+    pid = override or os.getenv("TOKENKNOWS_DEFAULT_PROJECT")
+    if not pid:
+        raise ValueError(
+            "未指定 project_id. 设置环境变量 TOKENKNOWS_DEFAULT_PROJECT 或在"
+            "命令中传入 project_id 参数."
+        )
+    return pid
+# ── tools ────────────────────────────────────────────────────────
+@mcp.tool()
+async def submit_session_events(
+    events: list[dict],
+    project_id: str | None = None,
+) -> dict:
+    """Persist conversation events into TokenKnows backend.
+    See the `session_capture` skill in this plugin for full call-timing
+    rules and examples (lazy-loaded, ~50 tokens at startup, full body
+    only when LLM determines relevance).
+    Args:
+        events: 1-100 events. Each item: {content (required), title?,
+            author_name?, event_type? (default "ai_conversation_turn"),
+            source_type? ("claude_cowork" in Cowork, default "claude_code"
+            elsewhere), source_ref?, external_id? (auto-hash), tags?}.
+        project_id: optional override; defaults TOKENKNOWS_DEFAULT_PROJECT.
+    Returns:
+        {"ingested": <new>, "skipped": <dup>, "project_id": "..."}
+    """
+    import hashlib
+    from datetime import datetime, timezone
+    pid = _default_project_id(project_id)
+    client = get_client()
+    now_iso = datetime.now(timezone.utc).isoformat()
+    payload_events = []
+    for ev in events[:100]:
+        content = ev.get("content", "")
+        ext_id = ev.get("external_id") or hashlib.sha1(
+            (ev.get("source_ref", "") + content[:200]).encode("utf-8"),
+            usedforsecurity=False,
+        ).hexdigest()[:16]
+        author = None
+        if ev.get("author_name"):
+            author = {"name": ev["author_name"]}
+        payload_events.append({
+            "source_type": ev.get("source_type", "claude_code"),
+            "source_ref": ev.get("source_ref", "claude-session"),
+            "external_id": ext_id,
+            "event_type": ev.get("event_type", "ai_conversation_turn"),
+            "occurred_at": ev.get("occurred_at") or now_iso,
+            "author": author,
+            "title": ev.get("title"),
+            "content": content,
+            "content_hash": hashlib.sha256(content.encode("utf-8")).hexdigest(),
+            "tags": ev.get("tags", []),
+            "trust_score": ev.get("trust_score"),
+        })
+    resp = await client.post(
+        f"/api/v1/projects/{pid}/events",
+        json={"events": payload_events},
+    )
+    return {
+        "ingested": resp.get("ingested", 0),
+        "skipped": resp.get("skipped", 0),
+        "project_id": pid,
+    }
+@mcp.tool()
+async def distill_document(
+    document_type: Literal[
+        "weekly_report", "tech_design", "adr", "incident",
+        "book", "agent_skill", "knowledge_graph",
+    ],
+    project_id: str | None = None,
+    time_window: str = "this_week",
+    model: str | None = None,
+) -> dict:
+    """触发 backend 5-stage pipeline 蒸馏 events → 文档.
+    Args:
+        document_type: 7 类之一 (周报 / 技术方案 / ADR / 复盘 / 书籍 /
+                       Skill / 知识图谱)
+        project_id: 项目 id; 不传用 default
+        time_window: 时间窗 (this_week/last_week/last_7_days/last_14_days/last_30_days)
+        model: 显式指定 model (e.g. "claude-sonnet-4-6"); 不传走 task 默认
+    Returns:
+        {
+          "asset_id": "...",
+          "status": "generating",
+          "title": "...",
+          "view_url": "/projects/{pid}/documents/{aid}",
+          "estimated_seconds": 60,
+          "note": "可调 get_asset 轮询完成状态"
+        }
+    """
+    pid = _default_project_id(project_id)
+    client = get_client()
+    payload: dict = {"type": document_type, "time_window": time_window}
+    if model:
+        payload["model_override"] = model
+    resp = await client.post(
+        f"/api/v1/projects/{pid}/assets/generate", json=payload,
+    )
+    aid = resp["id"]
+    return {
+        "asset_id": aid,
+        "status": resp["status"],
+        "title": resp["title"],
+        "view_url": f"/projects/{pid}/documents/{aid}",
+        "estimated_seconds": 60,
+        "note": "调 get_asset(asset_id) 查完成状态; status='draft' 即可读 markdown.",
+    }
+@mcp.tool()
+async def list_assets(
+    project_id: str | None = None,
+    asset_type: str | None = None,
+    status: str | None = None,
+    limit: int = 20,
+) -> dict:
+    """列项目下的蒸馏文档.
+    Args:
+        project_id: 项目 id; 不传用 default
+        asset_type: 过滤 weekly_report/tech_design/.../knowledge_graph
+        status: 过滤 generating/draft/in_review/approved/published
+        limit: 1-100, 默认 20
+    """
+    pid = _default_project_id(project_id)
+    params: dict = {"limit": limit}
+    if asset_type:
+        params["type"] = asset_type
+    if status:
+        params["status"] = status
+    client = get_client()
+    resp = await client.get(f"/api/v1/projects/{pid}/assets", params=params)
+    # 精简返回: 只 id/type/title/status/metrics/kg_summary, 不带 thumbnail (太大)
+    items = []
+    for a in resp.get("data", []):
+        item = {
+            "id": a["id"],
+            "type": a["type"],
+            "title": a["title"],
+            "status": a["status"],
+            "version": a["current_version"],
+            "updated_at": a["updated_at"],
+        }
+        if a.get("metrics"):
+            item["metrics"] = a["metrics"]
+        if a.get("kg_summary"):
+            item["kg_summary"] = {
+                "node_count": a["kg_summary"].get("node_count"),
+                "edge_count": a["kg_summary"].get("edge_count"),
+            }
+        items.append(item)
+    return {"total": resp.get("meta", {}).get("total", 0), "items": items}
+@mcp.tool()
+async def get_asset(asset_id: str) -> dict:
+    """读单个 asset 元数据 (不含 chapter content).
+    用于轮询 distill 完成状态. 完整内容用 get_asset_chapters 或读 resource.
+    """
+    client = get_client()
+    a = await client.get(f"/api/v1/assets/{asset_id}")
+    return {
+        "id": a["id"],
+        "type": a["type"],
+        "title": a["title"],
+        "status": a["status"],
+        "version": a["current_version"],
+        "approval_state": a["approval_state"],
+        "metrics": a.get("metrics"),
+        "kg_summary": a.get("kg_summary"),
+        "updated_at": a["updated_at"],
+    }
+@mcp.tool()
+async def get_asset_chapters(asset_id: str) -> list[dict]:
+    """读 asset 的所有 chapter (含 markdown content + layout).
+    对 knowledge_graph 类型, layout 含 nodes/edges/thumbnail_svg.
+    对其它 7 类, content 是 markdown 正文.
+    """
+    client = get_client()
+    chs = await client.get(f"/api/v1/assets/{asset_id}/chapters")
+    out: list[dict] = []
+    for c in chs:
+        item = {
+            "id": c["id"],
+            "title": c["title"],
+            "order_index": c["order_index"],
+            "content": c["content"],
+            "approval_state": c.get("approval_state", "pending"),
+        }
+        if c.get("layout"):
+            # KG: 简化 layout 只返回结构性字段, 不返回 thumbnail (前端用)
+            layout = c["layout"]
+            if "nodes" in layout:
+                item["kg_layout"] = {
+                    "nodes": layout.get("nodes", []),
+                    "edges": layout.get("edges", []),
+                }
+        out.append(item)
+    return out
+@mcp.tool()
+async def search_entity(
+    query: str,
+    project_id: str | None = None,
+    entity_type: Literal["person", "event", "concept", "artifact"] | None = None,
+    min_assets: int = 1,
+) -> list[dict]:
+    """跨文档实体搜索 (KG entity_registry).
+    例: search_entity('Alice') → 返回 Alice 出现在哪些 KG asset 里.
+    Args:
+        query: label / aliases 模糊匹配
+        project_id: 项目 id; 不传用 default
+        entity_type: 过滤 person/event/concept/artifact
+        min_assets: 仅返回出现在 ≥N 个 asset 的 (跨文档实体)
+    """
+    pid = _default_project_id(project_id)
+    params: dict = {"q": query, "min_assets": min_assets}
+    if entity_type:
+        params["type"] = entity_type
+    client = get_client()
+    entities = await client.get(
+        f"/api/v1/projects/{pid}/entities", params=params,
+    )
+    return [
+        {
+            "id": e["id"],
+            "type": e["type"],
+            "label": e["label"],
+            "aliases": e.get("aliases", []),
+            "asset_count": e.get("asset_count", 0),
+            "source_refs": e.get("source_refs", []),
+        }
+        for e in entities
+    ]
+# ── resources ─────────────────────────────────────────────────────
+@mcp.resource("tokenknows://asset/{asset_id}")
+async def asset_resource(asset_id: str) -> str:
+    """以 markdown 形式读单个 asset (所有 chapter 拼接).
+    Host (Claude) 可通过 @-mention 直接引用: @tokenknows://asset/demo-kg-001
+    """
+    client = get_client()
+    asset = await client.get(f"/api/v1/assets/{asset_id}")
+    chapters = await client.get(f"/api/v1/assets/{asset_id}/chapters")
+    parts = [f"# {asset['title']}", "", f"_type={asset['type']} · status={asset['status']} · v{asset['current_version']}_", ""]
+    for c in chapters:
+        parts.extend([f"## {c['title']}", "", c.get("content", ""), ""])
+    return "\n".join(parts)
+# ── prompts ───────────────────────────────────────────────────────
+@mcp.prompt()
+def distill_session(document_type: str = "weekly_report") -> str:
+    """模板: 把当前 session 蒸馏成指定文档类型.
+    Args:
+        document_type: weekly_report / tech_design / adr / incident / book /
+                       agent_skill / knowledge_graph
+    """
+    return f"""请把我们这个 Claude session 的对话蒸馏成 **{document_type}** 类型文档:
+1. 用 `submit_session_events` 工具把本次对话的关键节点 (用户的需求 / 你的方案 /
+   关键代码变更 / 决策与权衡) 整理成 3-10 条 event 提交;
+2. 调 `distill_document(document_type='{document_type}')` 触发后端流水线;
+3. 用 `get_asset(asset_id)` 轮询 status (≤60s 应变 'draft');
+4. 完成后用 `get_asset_chapters` 拉 markdown 给我看;
+5. 如果是 knowledge_graph 类型, 用 `search_entity` 查关键人物/概念跨文档出现.
+"""
+# ── entry ─────────────────────────────────────────────────────────
+def main() -> None:
+    """CLI entry: python -m mcp_server."""
+    import argparse
+    parser = argparse.ArgumentParser(description="TokenKnows MCP server")
+    parser.add_argument(
+        "--transport", choices=["stdio", "sse"], default="stdio",
+        help="MCP transport (stdio for Claude Code/Cowork; sse for remote)",
+    )
+    parser.add_argument(
+        "--port", type=int, default=8765,
+        help="SSE 端口 (仅 transport=sse 时)",
+    )
+    args = parser.parse_args()
+    if args.transport == "sse":
+        mcp.settings.port = args.port
+    mcp.run(transport=args.transport)
+if __name__ == "__main__":
+    main()

mcp_server/tools/__init__.py ADDED Viewed

File without changes

tokenknows_mcp-0.2.1.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,80 @@
+Metadata-Version: 2.4
+Name: tokenknows-mcp
+Version: 0.2.1
+Summary: TokenKnows MCP server — distill AI coding sessions (Claude Code / Codex / Cursor) into weekly reports, ADRs and a knowledge graph on your self-hosted workbench
+Project-URL: Homepage, https://github.com/johnnywuj81/tokenknows
+Project-URL: Repository, https://github.com/johnnywuj81/tokenknows
+Project-URL: Issues, https://github.com/johnnywuj81/tokenknows/issues
+Author: johnnywuj81
+License-Expression: MIT
+Keywords: claude-code,knowledge-graph,knowledge-management,mcp,model-context-protocol,self-hosted
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Software Development
+Requires-Python: >=3.11
+Requires-Dist: httpx>=0.27.0
+Requires-Dist: mcp>=1.2.0
+Description-Content-Type: text/markdown
+# TokenKnows MCP Server
+<!-- mcp-name: io.github.johnnywuj81/tokenknows -->
+MCP server for [TokenKnows](https://github.com/johnnywuj81/tokenknows) — a self-hosted engineering knowledge workbench that captures AI coding sessions (Claude Code / Codex / Cursor / VS Code) and distills them into structured documents: weekly reports, tech designs, ADRs, incident reviews, long-form books, agent skills and a knowledge graph, via a 5-stage LLM pipeline. Evidence-linked: every distilled claim links back to source session events.
+## Prerequisites
+This server is the bridge between your MCP host and a **self-hosted TokenKnows backend** (default `http://127.0.0.1:8001`). Deploy the backend first — see the [main repository](https://github.com/johnnywuj81/tokenknows). Local-first: your data goes only to the backend you configure.
+## Install & run
+```bash
+# Run directly (stdio, for Claude Code / Cowork / Cursor)
+uvx tokenknows-mcp
+# Or install then run
+pip install tokenknows-mcp
+tokenknows-mcp
+# SSE transport for remote / docker setups
+tokenknows-mcp --transport sse --port 8765
+```
+### Claude Code config example
+```json
+{
+  "mcpServers": {
+    "tokenknows": {
+      "command": "uvx",
+      "args": ["tokenknows-mcp"],
+      "env": { "TOKENKNOWS_API_BASE": "http://127.0.0.1:8001" }
+    }
+  }
+}
+```
+Tip: in Claude Code you can instead install the full plugin (MCP server + slash commands + skills): `/plugin marketplace add johnnywuj81/tokenknows` → `/plugin install tokenknows@tokenknows`.
+## Environment variables
+| Variable | Default | Description |
+|---|---|---|
+| `TOKENKNOWS_API_BASE` | `http://127.0.0.1:8001` | Self-hosted TokenKnows backend URL |
+| `TOKENKNOWS_API_TOKEN` | — | JWT bearer token (optional) |
+| `TOKENKNOWS_DEFAULT_PROJECT` | — | Default project_id for event submission |
+## Tools
+- `submit_session_events` — persist conversation turns into the knowledge base
+- `distill_document` — trigger the 5-stage pipeline (weekly_report / tech_design / adr / incident / book / agent_skill / knowledge_graph)
+- `list_assets` / `get_asset` / `get_asset_chapters` — read distilled output
+- `search_entity` — cross-document knowledge-graph entity search
+Plus `tokenknows://asset/{id}` resources and prompt templates for all 7 document types.
+## License
+[MIT](https://github.com/johnnywuj81/tokenknows/blob/main/LICENSE) — source of truth for this package lives in [`code/tokenknows-api/mcp_server`](https://github.com/johnnywuj81/tokenknows/tree/main/code/tokenknows-api/mcp_server).

tokenknows_mcp-0.2.1.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,12 @@
+mcp_server/__init__.py,sha256=gLZCqfNf5-hP9bVnMTF-nSY7a6tm10vsyHE_zBMQTj8,146
+mcp_server/__main__.py,sha256=hwPnEb6KNblf1zStE03O92_RfUFcv2w3F3Yddh8GPBY,110
+mcp_server/client.py,sha256=4Yr1zTZ_q5SK6MM9XU5BSwx8XX3Vlj-OtSb3oykQVW8,2736
+mcp_server/daemon.py,sha256=yaNkUlinboK4XCtWe_38EqTWmRocPZkMPrsZzENTNrA,11005
+mcp_server/server.py,sha256=2WGr1qsUDhsD4AfKSnDoT_wZsmthlqTqkJhss2z7Wk4,13760
+mcp_server/prompts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+mcp_server/resources/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+mcp_server/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+tokenknows_mcp-0.2.1.dist-info/METADATA,sha256=-LeWL_rsh7ZrS3MAUHWaQqCXCovg5_-f1bGnivQAHGY,3456
+tokenknows_mcp-0.2.1.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
+tokenknows_mcp-0.2.1.dist-info/entry_points.txt,sha256=dEYHp5rbVQ4QIzJpMIy0qKIuzBW8W7nPbtsk3HS1k-k,58
+tokenknows_mcp-0.2.1.dist-info/RECORD,,

tokenknows_mcp-0.2.1.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: hatchling 1.30.1
+Root-Is-Purelib: true
+Tag: py3-none-any

tokenknows_mcp-0.2.1.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ tokenknows-mcp = mcp_server.server:main