vox-code 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. vox_code-2.0.0.dist-info/METADATA +258 -0
  2. vox_code-2.0.0.dist-info/RECORD +88 -0
  3. vox_code-2.0.0.dist-info/WHEEL +4 -0
  4. vox_code-2.0.0.dist-info/entry_points.txt +3 -0
  5. voxcli/__init__.py +3 -0
  6. voxcli/__main__.py +5 -0
  7. voxcli/agent/__init__.py +12 -0
  8. voxcli/agent/agent.py +449 -0
  9. voxcli/agent/agent_budget.py +133 -0
  10. voxcli/agent/agent_orchestrator.py +414 -0
  11. voxcli/agent/plan_execute_agent.py +514 -0
  12. voxcli/agent/roles.py +80 -0
  13. voxcli/agent/sub_agent.py +351 -0
  14. voxcli/catalog.py +477 -0
  15. voxcli/chat.py +91 -0
  16. voxcli/cli/__init__.py +4 -0
  17. voxcli/cli/main.py +452 -0
  18. voxcli/cli/parser.py +71 -0
  19. voxcli/config.py +518 -0
  20. voxcli/gui/__main__.py +3 -0
  21. voxcli/gui/main.py +22 -0
  22. voxcli/gui/pet/__init__.py +5 -0
  23. voxcli/gui/pet/base.py +62 -0
  24. voxcli/gui/pet/coordinator.py +888 -0
  25. voxcli/gui/pet/data.py +430 -0
  26. voxcli/gui/pet/widgets.py +683 -0
  27. voxcli/gui/pet/windows.py +2298 -0
  28. voxcli/gui/pet/workers.py +54 -0
  29. voxcli/gui/pet_app.py +7 -0
  30. voxcli/hitl/__init__.py +11 -0
  31. voxcli/hitl/handler.py +11 -0
  32. voxcli/hitl/policy.py +32 -0
  33. voxcli/hitl/request.py +13 -0
  34. voxcli/hitl/result.py +11 -0
  35. voxcli/hitl/terminal_handler.py +64 -0
  36. voxcli/hitl/tool_registry.py +64 -0
  37. voxcli/llm/base.py +93 -0
  38. voxcli/llm/factory.py +178 -0
  39. voxcli/llm/ollama_client.py +137 -0
  40. voxcli/llm/openai_compatible.py +249 -0
  41. voxcli/memory/base.py +16 -0
  42. voxcli/memory/budget.py +53 -0
  43. voxcli/memory/compressor.py +198 -0
  44. voxcli/memory/entry.py +36 -0
  45. voxcli/memory/long_term.py +126 -0
  46. voxcli/memory/manager.py +101 -0
  47. voxcli/memory/retriever.py +72 -0
  48. voxcli/memory/short_term.py +84 -0
  49. voxcli/memory/tokenizer.py +21 -0
  50. voxcli/plan/__init__.py +5 -0
  51. voxcli/plan/execution_plan.py +225 -0
  52. voxcli/plan/planner.py +198 -0
  53. voxcli/plan/task.py +123 -0
  54. voxcli/policy/audit_log.py +111 -0
  55. voxcli/policy/command_guard.py +34 -0
  56. voxcli/policy/exception.py +5 -0
  57. voxcli/policy/path_guard.py +32 -0
  58. voxcli/prompting/__init__.py +7 -0
  59. voxcli/prompting/presenter.py +154 -0
  60. voxcli/rag/__init__.py +16 -0
  61. voxcli/rag/analyzer.py +89 -0
  62. voxcli/rag/chunk.py +17 -0
  63. voxcli/rag/chunker.py +137 -0
  64. voxcli/rag/embedding.py +75 -0
  65. voxcli/rag/formatter.py +40 -0
  66. voxcli/rag/index.py +96 -0
  67. voxcli/rag/relation.py +14 -0
  68. voxcli/rag/retriever.py +58 -0
  69. voxcli/rag/store.py +155 -0
  70. voxcli/rag/tokenizer.py +26 -0
  71. voxcli/runtime/__init__.py +6 -0
  72. voxcli/runtime/session_controller.py +386 -0
  73. voxcli/tool/__init__.py +3 -0
  74. voxcli/tool/tool_registry.py +433 -0
  75. voxcli/util/animation.py +219 -0
  76. voxcli/util/ansi.py +82 -0
  77. voxcli/util/markdown.py +98 -0
  78. voxcli/web/__init__.py +17 -0
  79. voxcli/web/base.py +20 -0
  80. voxcli/web/extractor.py +77 -0
  81. voxcli/web/factory.py +38 -0
  82. voxcli/web/fetch_result.py +27 -0
  83. voxcli/web/fetcher.py +42 -0
  84. voxcli/web/network_policy.py +49 -0
  85. voxcli/web/result.py +23 -0
  86. voxcli/web/searxng.py +55 -0
  87. voxcli/web/serpapi.py +53 -0
  88. voxcli/web/zhipu.py +55 -0
@@ -0,0 +1,249 @@
1
+ """OpenAI 兼容 API 客户端(支持 SSE 流式)"""
2
+
3
+ import base64
4
+ import json
5
+ import logging
6
+ from pathlib import Path
7
+ from typing import Optional
8
+
9
+ import httpx
10
+
11
+ from ..chat import SUPPORTED_IMAGE_MIME_TYPES
12
+ from .base import (
13
+ LlmClient, Message, ChatResponse, ToolCall, ToolDef,
14
+ StreamListener, STREAM_LISTENER_NOOP,
15
+ )
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ class OpenAiCompatibleClient(LlmClient):
21
+ def __init__(self, api_key: str, model: str, base_url: str, provider_name: str,
22
+ timeout: float = 300.0):
23
+ self._api_key = api_key
24
+ self._model = model
25
+ self._base_url = base_url
26
+ self._provider_name = provider_name
27
+ # Use certifi if available for better SSL compat (macOS system Python)
28
+ try:
29
+ import certifi
30
+ verify = certifi.where()
31
+ except ImportError:
32
+ verify = True
33
+ self._http = httpx.Client(timeout=httpx.Timeout(timeout, connect=60.0), verify=verify)
34
+
35
+ @property
36
+ def model_name(self) -> str:
37
+ return self._model
38
+
39
+ @property
40
+ def provider_name(self) -> str:
41
+ return self._provider_name
42
+
43
+ @property
44
+ def supports_image_inputs(self) -> bool:
45
+ return True
46
+
47
+ def _encode_image_attachment(self, attachment) -> dict:
48
+ file_path = Path(attachment.file_path)
49
+ if attachment.mime_type not in SUPPORTED_IMAGE_MIME_TYPES:
50
+ raise ValueError(
51
+ f"仅支持 png/jpg/jpeg/webp 图片,当前文件不受支持: {attachment.display_name}"
52
+ )
53
+ if not file_path.exists():
54
+ raise FileNotFoundError(f"图片不存在: {file_path}")
55
+ if not file_path.is_file():
56
+ raise ValueError(f"不是有效的图片文件: {file_path}")
57
+ try:
58
+ data = file_path.read_bytes()
59
+ except OSError as exc:
60
+ raise RuntimeError(f"读取图片失败: {attachment.display_name}: {exc}") from exc
61
+ try:
62
+ encoded = base64.b64encode(data).decode("ascii")
63
+ except Exception as exc:
64
+ raise RuntimeError(f"图片编码失败: {attachment.display_name}: {exc}") from exc
65
+ return {
66
+ "type": "image_url",
67
+ "image_url": {
68
+ "url": f"data:{attachment.mime_type};base64,{encoded}",
69
+ },
70
+ }
71
+
72
+ def _build_message_content(self, message: Message):
73
+ if not message.attachments:
74
+ return message.content
75
+
76
+ blocks: list[dict] = []
77
+ text = message.content or ""
78
+ if text:
79
+ blocks.append({"type": "text", "text": text})
80
+ for attachment in message.attachments:
81
+ blocks.append(self._encode_image_attachment(attachment))
82
+ return blocks
83
+
84
+ def _build_request(self, messages: list[Message],
85
+ tools: Optional[list[ToolDef]] = None) -> dict:
86
+ body = {
87
+ "model": self._model,
88
+ "stream": True,
89
+ "messages": [],
90
+ }
91
+ allow_reasoning_content = self._provider_name not in {"deepseek", "qwen"}
92
+ for msg in messages:
93
+ m: dict = {"role": msg.role}
94
+ content = self._build_message_content(msg)
95
+ if content is not None:
96
+ m["content"] = content
97
+ if allow_reasoning_content and msg.reasoning_content:
98
+ m["reasoning_content"] = msg.reasoning_content
99
+ if msg.tool_calls:
100
+ m["tool_calls"] = [
101
+ {
102
+ "id": tc.id,
103
+ "type": "function",
104
+ "function": {"name": tc.name, "arguments": tc.arguments},
105
+ }
106
+ for tc in msg.tool_calls
107
+ ]
108
+ if msg.tool_call_id:
109
+ m["tool_call_id"] = msg.tool_call_id
110
+ if msg.role == "assistant" and msg.tool_calls and "content" not in m:
111
+ m["content"] = ""
112
+ if msg.role == "tool" and "content" not in m:
113
+ m["content"] = ""
114
+ body["messages"].append(m)
115
+
116
+ if tools:
117
+ body["tools"] = [
118
+ {
119
+ "type": "function",
120
+ "function": {
121
+ "name": t.name,
122
+ "description": t.description,
123
+ "parameters": t.parameters,
124
+ },
125
+ }
126
+ for t in tools
127
+ ]
128
+ return body
129
+
130
+ @staticmethod
131
+ def _merge_tool_calls(acc: dict[int, dict], tool_calls_delta: list[dict]):
132
+ for tc in tool_calls_delta:
133
+ idx = tc.get("index", len(acc))
134
+ if idx not in acc:
135
+ acc[idx] = {"id": "", "name": "", "arguments": ""}
136
+ if tc.get("id"):
137
+ acc[idx]["id"] = tc["id"]
138
+ fn = tc.get("function", {})
139
+ if fn.get("name"):
140
+ acc[idx]["name"] = fn["name"]
141
+ if fn.get("arguments"):
142
+ acc[idx]["arguments"] += fn["arguments"]
143
+
144
+ @staticmethod
145
+ def _build_tool_calls(acc: dict[int, dict]) -> list[ToolCall]:
146
+ if not acc:
147
+ return []
148
+ result = []
149
+ for idx in sorted(acc.keys()):
150
+ entry = acc[idx]
151
+ if not entry["id"]:
152
+ continue
153
+ result.append(ToolCall(
154
+ id=entry["id"],
155
+ name=entry["name"],
156
+ arguments=entry["arguments"],
157
+ ))
158
+ return result
159
+
160
+ def chat(self, messages: list[Message], tools: Optional[list[ToolDef]] = None,
161
+ listener: StreamListener = STREAM_LISTENER_NOOP) -> ChatResponse:
162
+ body = self._build_request(messages, tools)
163
+ headers = {
164
+ "Authorization": f"Bearer {self._api_key}",
165
+ "Content-Type": "application/json",
166
+ "Accept": "text/event-stream",
167
+ }
168
+
169
+ def clean(obj):
170
+ if isinstance(obj, str):
171
+ return obj.encode("utf-8", "surrogatepass").decode("utf-8", "ignore")
172
+ elif isinstance(obj, dict):
173
+ return {k: clean(v) for k, v in obj.items()}
174
+ elif isinstance(obj, list):
175
+ return [clean(i) for i in obj]
176
+ return obj
177
+
178
+ body = clean(body)
179
+
180
+ json_data = json.dumps(body, ensure_ascii=False).encode("utf-8", "ignore")
181
+
182
+ response = self._http.post(
183
+ self._base_url,
184
+ content=json_data,
185
+ headers=headers
186
+ )
187
+
188
+ try:
189
+ response.raise_for_status()
190
+ except httpx.HTTPStatusError as exc:
191
+ detail = response.text.strip()
192
+ if detail:
193
+ raise RuntimeError(
194
+ f"{self._provider_name} 接口报错 {response.status_code}: {detail}"
195
+ ) from exc
196
+ raise
197
+
198
+ content_parts: list[str] = []
199
+ reasoning_parts: list[str] = []
200
+ tool_calls_acc: dict[int, dict] = {}
201
+ input_tokens = output_tokens = 0
202
+
203
+ for line in response.iter_lines():
204
+ line = line.strip()
205
+ if not line or not line.startswith("data:"):
206
+ continue
207
+ payload = line[5:].strip()
208
+ if not payload or payload == "[DONE]":
209
+ break
210
+
211
+ try:
212
+ chunk = json.loads(payload)
213
+ except json.JSONDecodeError:
214
+ continue
215
+
216
+ usage = chunk.get("usage")
217
+ if usage:
218
+ input_tokens = usage.get("prompt_tokens", input_tokens)
219
+ output_tokens = usage.get("completion_tokens", output_tokens)
220
+
221
+ choices = chunk.get("choices", [])
222
+ if not choices:
223
+ continue
224
+ delta = choices[0].get("delta", {})
225
+
226
+ rdelta = delta.get("reasoning_content", "")
227
+ if rdelta:
228
+ reasoning_parts.append(rdelta)
229
+ listener.on_reasoning_delta(rdelta)
230
+
231
+ cdelta = delta.get("content", "")
232
+ if cdelta:
233
+ content_parts.append(cdelta)
234
+ listener.on_content_delta(cdelta)
235
+
236
+ tool_calls_delta = delta.get("tool_calls")
237
+ if tool_calls_delta:
238
+ self._merge_tool_calls(tool_calls_acc, tool_calls_delta)
239
+
240
+ tool_calls = self._build_tool_calls(tool_calls_acc)
241
+
242
+ return ChatResponse(
243
+ role="assistant",
244
+ content="".join(content_parts) or None,
245
+ reasoning_content="".join(reasoning_parts) or None,
246
+ tool_calls=tool_calls or None,
247
+ input_tokens=input_tokens,
248
+ output_tokens=output_tokens,
249
+ )
voxcli/memory/base.py ADDED
@@ -0,0 +1,16 @@
1
+ """记忆接口"""
2
+
3
+ from typing import List, Optional, Protocol
4
+
5
+ from .entry import MemoryEntry
6
+
7
+
8
+ class Memory(Protocol):
9
+ def store(self, entry: MemoryEntry): ...
10
+ def retrieve(self, id: str) -> Optional[MemoryEntry]: ...
11
+ def search(self, query: str, limit: int) -> List[MemoryEntry]: ...
12
+ def get_all(self) -> List[MemoryEntry]: ...
13
+ def delete(self, id: str) -> bool: ...
14
+ def clear(self): ...
15
+ def token_count(self) -> int: ...
16
+ def size(self) -> int: ...
@@ -0,0 +1,53 @@
1
+ """Token 预算管理器"""
2
+
3
+ from typing import List
4
+
5
+ from .entry import estimate_tokens
6
+ from .short_term import ConversationMemory
7
+ from ..llm.base import Message
8
+
9
+
10
+ class TokenBudget:
11
+ def __init__(self, context_window: int = 200000,
12
+ reserved_for_system: int = 500,
13
+ reserved_for_tools: int = 800,
14
+ reserved_for_response: int = 2000):
15
+ self._context_window = context_window
16
+ self._reserved_for_system = reserved_for_system
17
+ self._reserved_for_tools = reserved_for_tools
18
+ self._reserved_for_response = reserved_for_response
19
+ self._total_input = 0
20
+ self._total_output = 0
21
+ self._call_count = 0
22
+
23
+ @property
24
+ def available_for_conversation(self) -> int:
25
+ return self._context_window - self._reserved_for_system - self._reserved_for_tools - self._reserved_for_response
26
+
27
+ def needs_compression(self, memory: ConversationMemory) -> bool:
28
+ compression_budget = min(memory.max_tokens, self.available_for_conversation)
29
+ return memory.token_count() >= compression_budget * 0.8
30
+
31
+ def record_usage(self, input_tokens: int, output_tokens: int):
32
+ self._total_input += input_tokens
33
+ self._total_output += output_tokens
34
+ self._call_count += 1
35
+
36
+ @property
37
+ def usage_report(self) -> str:
38
+ avg = self._total_input / self._call_count if self._call_count > 0 else 0
39
+ return (f"Token 统计: 调用 {self._call_count} 次 | "
40
+ f"总输入: {self._total_input} | 总输出: {self._total_output} | "
41
+ f"平均输入: {avg:.0f} | 预算: {self._context_window}")
42
+
43
+ @staticmethod
44
+ def estimate_messages_tokens(messages: List[Message]) -> int:
45
+ total = 0
46
+ for msg in messages:
47
+ if msg.content:
48
+ total += estimate_tokens(msg.content)
49
+ if msg.tool_calls:
50
+ for tc in msg.tool_calls:
51
+ total += estimate_tokens(tc.arguments)
52
+ total += len(messages) * 4
53
+ return total
@@ -0,0 +1,198 @@
1
+ """上下文压缩器"""
2
+
3
+ import logging
4
+ import uuid
5
+ from typing import List, Optional
6
+
7
+ from .entry import MemoryEntry, MemoryType, estimate_tokens
8
+ from .short_term import ConversationMemory
9
+ from .long_term import LongTermMemory
10
+ from ..llm.base import LlmClient, Message
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+ _MAP_PROMPT = """请将以下对话片段压缩成一段简洁的摘要,保留关键信息:
15
+ - 用户的需求和意图
16
+ - 已执行的操作和结果
17
+ - 做出的决策和结论
18
+ - 重要的技术细节
19
+
20
+ 对话片段:
21
+ %s
22
+
23
+ 请用中文输出摘要,控制在200字以内。"""
24
+
25
+ _REDUCE_PROMPT = """请将以下多个摘要合并成一个整体摘要,保留所有关键信息。
26
+
27
+ 各片段摘要:
28
+ %s
29
+
30
+ 请用中文输出合并摘要,控制在300字以内。"""
31
+
32
+ _EXTRACT_FACTS_PROMPT = """请从以下对话中提取"跨会话仍然成立、未来复用仍有价值"的稳定事实,格式为每行一条:
33
+ - 用户偏好和习惯
34
+ - 项目信息(名称、路径、技术栈)
35
+ - 重要决策和约定
36
+
37
+ 只保留用户明确说明、或工具/代码库可验证的信息。
38
+ 绝对不要提取以下内容:
39
+ - 当前这一轮让你执行的临时任务、步骤、todo
40
+ - 一次性的文件名、目录名、输出要求
41
+ - 模型自己的猜测、纠错、提醒、推断
42
+ - "用户想要/需要/让我/请你..." 这类请求句
43
+
44
+ 对话内容:
45
+ %s
46
+
47
+ 请每行一条事实,不要多余解释。"""
48
+
49
+ _EPHEMERAL_PREFIXES = ["用户想", "用户要", "用户需要", "用户请求", "帮我", "让我",
50
+ "新建", "创建", "删除", "修改", "生成", "补充要求", "当前这一轮", "本次任务"]
51
+ _SPECULATION_CUES = ["可能", "应该", "猜测", "推测", "笔误", "提醒"]
52
+ _DURABLE_HINTS = ["用户偏好", "用户习惯", "喜欢", "倾向", "项目", "仓库", "路径", "技术栈",
53
+ "版本", "模型", "接口", "配置", "环境变量", "命令", "约定", "规则", "默认"]
54
+
55
+
56
+ class ContextCompressor:
57
+ def __init__(self, llm_client: LlmClient, retain_recent_rounds: int = 3):
58
+ self._llm = llm_client
59
+ self._retain = retain_recent_rounds
60
+
61
+ def set_llm_client(self, llm_client: LlmClient):
62
+ self._llm = llm_client
63
+
64
+ def compress(self, memory: ConversationMemory) -> Optional[str]:
65
+ all_entries = memory.get_all()
66
+ if len(all_entries) <= self._retain:
67
+ return None
68
+
69
+ split = len(all_entries) - self._retain
70
+ old = list(all_entries[:split])
71
+ recent = list(all_entries[split:])
72
+
73
+ chunk_summaries = self._map_phase(old)
74
+ if not chunk_summaries:
75
+ return None
76
+
77
+ final_summary = (chunk_summaries[0] if len(chunk_summaries) == 1
78
+ else self._reduce_phase(chunk_summaries))
79
+
80
+ memory.clear()
81
+ summary_entry = MemoryEntry(
82
+ id=f"summary-{uuid.uuid4().hex[:8]}",
83
+ content=f"[历史对话摘要] {final_summary}",
84
+ type=MemoryType.SUMMARY,
85
+ )
86
+ memory.store(summary_entry)
87
+ for entry in recent:
88
+ memory.store(entry)
89
+
90
+ return final_summary
91
+
92
+ def extract_facts(self, entries: List[MemoryEntry],
93
+ long_term: LongTermMemory) -> List[str]:
94
+ if not entries:
95
+ return []
96
+
97
+ conversation = "\n".join(
98
+ f"{self._resolve_source(e)}({e.type.value}): {e.content}"
99
+ for e in entries
100
+ )
101
+
102
+ try:
103
+ prompt = _EXTRACT_FACTS_PROMPT % conversation
104
+ resp = self._llm.chat([
105
+ Message.system("你是一个信息提取助手,只输出关键事实,不输出其他内容。"),
106
+ Message.user(prompt),
107
+ ])
108
+ facts_text = resp.content or ""
109
+
110
+ facts = []
111
+ for line in facts_text.split("\n"):
112
+ fact = self._normalize_fact(line)
113
+ if self._is_persistent_fact(fact):
114
+ facts.append(fact)
115
+ entry = MemoryEntry(
116
+ id=f"fact-{uuid.uuid4().hex[:8]}",
117
+ content=fact,
118
+ type=MemoryType.FACT,
119
+ metadata={"source": "fact_extractor"},
120
+ )
121
+ long_term.store(entry)
122
+ return facts
123
+ except Exception as e:
124
+ logger.warning("事实提取失败: %s", e)
125
+ return []
126
+
127
+ def _map_phase(self, entries: List[MemoryEntry]) -> List[str]:
128
+ summaries = []
129
+ chunk_size = 5
130
+ for i in range(0, len(entries), chunk_size):
131
+ chunk = entries[i:i + chunk_size]
132
+ chunk_text = "\n".join(
133
+ f"{e.type.value}: {e.content}" for e in chunk
134
+ )
135
+ try:
136
+ prompt = _MAP_PROMPT % chunk_text
137
+ resp = self._llm.chat([
138
+ Message.system("你是一个对话摘要助手。"),
139
+ Message.user(prompt),
140
+ ])
141
+ summaries.append(resp.content or "")
142
+ except Exception as e:
143
+ logger.warning("摘要生成失败: %s", e)
144
+ summaries.append(f"[压缩] {chunk_text[:200]}")
145
+ return summaries
146
+
147
+ def _reduce_phase(self, summaries: List[str]) -> str:
148
+ joined = "\n\n---\n\n".join(summaries)
149
+ try:
150
+ prompt = _REDUCE_PROMPT % joined
151
+ resp = self._llm.chat([
152
+ Message.system("你是一个摘要合并助手。"),
153
+ Message.user(prompt),
154
+ ])
155
+ return resp.content or ";".join(summaries)
156
+ except Exception as e:
157
+ logger.warning("摘要合并失败: %s", e)
158
+ return ";".join(summaries)
159
+
160
+ @staticmethod
161
+ def _resolve_source(entry: MemoryEntry) -> str:
162
+ src = entry.metadata.get("source", "")
163
+ if src:
164
+ return src
165
+ if entry.id.startswith("user-"):
166
+ return "user"
167
+ if entry.id.startswith("assistant-"):
168
+ return "assistant"
169
+ if entry.id.startswith("tool-"):
170
+ return "tool"
171
+ return "unknown"
172
+
173
+ @staticmethod
174
+ def _normalize_fact(line: str) -> str:
175
+ fact = (line or "").strip()
176
+ if fact.startswith("- "):
177
+ fact = fact[2:]
178
+ elif fact.startswith("• "):
179
+ fact = fact[2:]
180
+ return fact.strip()
181
+
182
+ @staticmethod
183
+ def _is_persistent_fact(fact: str) -> bool:
184
+ if not fact or len(fact) <= 5:
185
+ return False
186
+ normalized = fact.lower()
187
+ for p in _EPHEMERAL_PREFIXES:
188
+ if normalized.startswith(p.lower()):
189
+ return False
190
+ for c in _SPECULATION_CUES:
191
+ if c in normalized:
192
+ return False
193
+ if ":" in fact or ":" in fact:
194
+ return True
195
+ for h in _DURABLE_HINTS:
196
+ if h in normalized:
197
+ return True
198
+ return False
voxcli/memory/entry.py ADDED
@@ -0,0 +1,36 @@
1
+ """记忆条目"""
2
+
3
+ import math
4
+ import time
5
+ from dataclasses import dataclass, field
6
+ from enum import Enum
7
+ from typing import Optional
8
+
9
+
10
+ class MemoryType(Enum):
11
+ CONVERSATION = "conversation"
12
+ FACT = "fact"
13
+ SUMMARY = "summary"
14
+ TOOL_RESULT = "tool_result"
15
+
16
+
17
+ @dataclass
18
+ class MemoryEntry:
19
+ id: str
20
+ content: str
21
+ type: MemoryType
22
+ metadata: dict = field(default_factory=dict)
23
+ timestamp: float = field(default_factory=time.time)
24
+ token_count: int = 0
25
+
26
+ def __post_init__(self):
27
+ if self.token_count <= 0 and self.content:
28
+ self.token_count = estimate_tokens(self.content)
29
+
30
+
31
+ def estimate_tokens(text: Optional[str]) -> int:
32
+ if not text:
33
+ return 0
34
+ chinese = sum(1 for c in text if '一' <= c <= '鿿')
35
+ other = len(text) - chinese
36
+ return math.ceil(chinese / 1.5 + other / 4.0)
@@ -0,0 +1,126 @@
1
+ """长期记忆 - 跨对话持久化的关键信息"""
2
+
3
+ import json
4
+ import os
5
+ from pathlib import Path
6
+ from typing import List, Optional
7
+
8
+ from .base import Memory
9
+ from .entry import MemoryEntry, MemoryType
10
+ from .tokenizer import matches, tokenize
11
+
12
+
13
+ _STORAGE_FILE = "long_term_memory.json"
14
+
15
+
16
+ def _storage_dir() -> Path:
17
+ env_dir = os.environ.get("VOX_CODE_MEMORY_DIR", "")
18
+ if env_dir.strip():
19
+ return Path(env_dir.strip())
20
+ return Path.home() / ".vox-code" / "memory"
21
+
22
+
23
+ class LongTermMemory(Memory):
24
+ def __init__(self):
25
+ self._entries: dict[str, MemoryEntry] = {}
26
+ self._token_counter = 0
27
+ self._storage_file = _storage_dir() / _STORAGE_FILE
28
+ self._load_from_disk()
29
+
30
+ def store(self, entry: MemoryEntry):
31
+ if any(e.content == entry.content for e in self._entries.values()):
32
+ return
33
+ self._entries[entry.id] = entry
34
+ self._token_counter += entry.token_count
35
+ self._save_to_disk()
36
+
37
+ def retrieve(self, id: str) -> Optional[MemoryEntry]:
38
+ return self._entries.get(id)
39
+
40
+ def search(self, query: str, limit: int) -> List[MemoryEntry]:
41
+ query_tokens = tokenize(query)
42
+ results = []
43
+ for entry in self._entries.values():
44
+ if matches(entry.content, query_tokens):
45
+ results.append(entry)
46
+ elif any(matches(v, query_tokens) for v in entry.metadata.values()):
47
+ results.append(entry)
48
+ if len(results) >= limit:
49
+ break
50
+ return results
51
+
52
+ def get_all(self) -> List[MemoryEntry]:
53
+ return list(self._entries.values())
54
+
55
+ def delete(self, id: str) -> bool:
56
+ entry = self._entries.pop(id, None)
57
+ if entry:
58
+ self._token_counter -= entry.token_count
59
+ self._save_to_disk()
60
+ return True
61
+ return False
62
+
63
+ def clear(self):
64
+ self._entries.clear()
65
+ self._token_counter = 0
66
+ self._save_to_disk()
67
+
68
+ def token_count(self) -> int:
69
+ return self._token_counter
70
+
71
+ def size(self) -> int:
72
+ return len(self._entries)
73
+
74
+ def get_by_type(self, type_: MemoryType) -> List[MemoryEntry]:
75
+ return [e for e in self._entries.values() if e.type == type_]
76
+
77
+ def status_summary(self) -> str:
78
+ type_counts = {}
79
+ for e in self._entries.values():
80
+ type_counts[e.type] = type_counts.get(e.type, 0) + 1
81
+ return (f"长期记忆: {self.size()}条 / {self._token_counter} tokens "
82
+ f"(事实: {type_counts.get(MemoryType.FACT, 0)}, "
83
+ f"摘要: {type_counts.get(MemoryType.SUMMARY, 0)}, "
84
+ f"工具结果: {type_counts.get(MemoryType.TOOL_RESULT, 0)})")
85
+
86
+ def _save_to_disk(self):
87
+ try:
88
+ self._storage_file.parent.mkdir(parents=True, exist_ok=True)
89
+ data = [
90
+ {
91
+ "id": e.id, "content": e.content,
92
+ "type": e.type.value,
93
+ "timestamp": e.timestamp,
94
+ "metadata": e.metadata,
95
+ "tokenCount": e.token_count,
96
+ }
97
+ for e in self._entries.values()
98
+ ]
99
+ self._storage_file.write_text(
100
+ json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8"
101
+ )
102
+ except OSError as e:
103
+ print(f"⚠️ 长期记忆持久化失败: {e}")
104
+
105
+ def _load_from_disk(self):
106
+ if not self._storage_file.exists():
107
+ return
108
+ try:
109
+ data = json.loads(self._storage_file.read_text(encoding="utf-8"))
110
+ for item in data:
111
+ entry = MemoryEntry(
112
+ id=item["id"],
113
+ content=item["content"],
114
+ type=MemoryType(item["type"]),
115
+ metadata=item.get("metadata", {}),
116
+ timestamp=item.get("timestamp", 0),
117
+ token_count=item.get("tokenCount", 0),
118
+ )
119
+ if entry.token_count <= 0:
120
+ from .entry import estimate_tokens
121
+ entry.token_count = estimate_tokens(entry.content)
122
+ self._entries[entry.id] = entry
123
+ self._token_counter += entry.token_count
124
+ print(f"📂 加载了 {len(self._entries)} 条长期记忆")
125
+ except Exception as e:
126
+ print(f"⚠️ 加载长期记忆失败: {e}")