vox-code 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vox_code-2.0.0.dist-info/METADATA +258 -0
- vox_code-2.0.0.dist-info/RECORD +88 -0
- vox_code-2.0.0.dist-info/WHEEL +4 -0
- vox_code-2.0.0.dist-info/entry_points.txt +3 -0
- voxcli/__init__.py +3 -0
- voxcli/__main__.py +5 -0
- voxcli/agent/__init__.py +12 -0
- voxcli/agent/agent.py +449 -0
- voxcli/agent/agent_budget.py +133 -0
- voxcli/agent/agent_orchestrator.py +414 -0
- voxcli/agent/plan_execute_agent.py +514 -0
- voxcli/agent/roles.py +80 -0
- voxcli/agent/sub_agent.py +351 -0
- voxcli/catalog.py +477 -0
- voxcli/chat.py +91 -0
- voxcli/cli/__init__.py +4 -0
- voxcli/cli/main.py +452 -0
- voxcli/cli/parser.py +71 -0
- voxcli/config.py +518 -0
- voxcli/gui/__main__.py +3 -0
- voxcli/gui/main.py +22 -0
- voxcli/gui/pet/__init__.py +5 -0
- voxcli/gui/pet/base.py +62 -0
- voxcli/gui/pet/coordinator.py +888 -0
- voxcli/gui/pet/data.py +430 -0
- voxcli/gui/pet/widgets.py +683 -0
- voxcli/gui/pet/windows.py +2298 -0
- voxcli/gui/pet/workers.py +54 -0
- voxcli/gui/pet_app.py +7 -0
- voxcli/hitl/__init__.py +11 -0
- voxcli/hitl/handler.py +11 -0
- voxcli/hitl/policy.py +32 -0
- voxcli/hitl/request.py +13 -0
- voxcli/hitl/result.py +11 -0
- voxcli/hitl/terminal_handler.py +64 -0
- voxcli/hitl/tool_registry.py +64 -0
- voxcli/llm/base.py +93 -0
- voxcli/llm/factory.py +178 -0
- voxcli/llm/ollama_client.py +137 -0
- voxcli/llm/openai_compatible.py +249 -0
- voxcli/memory/base.py +16 -0
- voxcli/memory/budget.py +53 -0
- voxcli/memory/compressor.py +198 -0
- voxcli/memory/entry.py +36 -0
- voxcli/memory/long_term.py +126 -0
- voxcli/memory/manager.py +101 -0
- voxcli/memory/retriever.py +72 -0
- voxcli/memory/short_term.py +84 -0
- voxcli/memory/tokenizer.py +21 -0
- voxcli/plan/__init__.py +5 -0
- voxcli/plan/execution_plan.py +225 -0
- voxcli/plan/planner.py +198 -0
- voxcli/plan/task.py +123 -0
- voxcli/policy/audit_log.py +111 -0
- voxcli/policy/command_guard.py +34 -0
- voxcli/policy/exception.py +5 -0
- voxcli/policy/path_guard.py +32 -0
- voxcli/prompting/__init__.py +7 -0
- voxcli/prompting/presenter.py +154 -0
- voxcli/rag/__init__.py +16 -0
- voxcli/rag/analyzer.py +89 -0
- voxcli/rag/chunk.py +17 -0
- voxcli/rag/chunker.py +137 -0
- voxcli/rag/embedding.py +75 -0
- voxcli/rag/formatter.py +40 -0
- voxcli/rag/index.py +96 -0
- voxcli/rag/relation.py +14 -0
- voxcli/rag/retriever.py +58 -0
- voxcli/rag/store.py +155 -0
- voxcli/rag/tokenizer.py +26 -0
- voxcli/runtime/__init__.py +6 -0
- voxcli/runtime/session_controller.py +386 -0
- voxcli/tool/__init__.py +3 -0
- voxcli/tool/tool_registry.py +433 -0
- voxcli/util/animation.py +219 -0
- voxcli/util/ansi.py +82 -0
- voxcli/util/markdown.py +98 -0
- voxcli/web/__init__.py +17 -0
- voxcli/web/base.py +20 -0
- voxcli/web/extractor.py +77 -0
- voxcli/web/factory.py +38 -0
- voxcli/web/fetch_result.py +27 -0
- voxcli/web/fetcher.py +42 -0
- voxcli/web/network_policy.py +49 -0
- voxcli/web/result.py +23 -0
- voxcli/web/searxng.py +55 -0
- voxcli/web/serpapi.py +53 -0
- voxcli/web/zhipu.py +55 -0
voxcli/util/ansi.py
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
"""终端 ANSI 样式辅助 - 支持 Claude Code 风格动画显示"""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import sys
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def _color_enabled() -> bool:
|
|
8
|
+
prop = os.environ.get("VOX_CODE_RENDER_COLOR", "")
|
|
9
|
+
if prop:
|
|
10
|
+
return prop.lower() in ("true", "1", "yes")
|
|
11
|
+
if os.environ.get("NO_COLOR"):
|
|
12
|
+
return False
|
|
13
|
+
term = os.environ.get("TERM", "")
|
|
14
|
+
return term.lower() != "dumb" and sys.stdout.isatty()
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
_ENABLED = _color_enabled()
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _wrap(prefix: str, text: str) -> str:
|
|
21
|
+
if not _ENABLED or not text:
|
|
22
|
+
return text or ""
|
|
23
|
+
return f"{prefix}{text}\033[0m"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def heading(text: str) -> str:
|
|
27
|
+
"""亮青色粗体 - 用于主标题"""
|
|
28
|
+
return _wrap("\033[1m\033[36m", text)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def section(text: str) -> str:
|
|
32
|
+
"""绿色粗体 - 用于段落标题"""
|
|
33
|
+
return _wrap("\033[1m\033[32m", text)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def subtle(text: str) -> str:
|
|
37
|
+
"""灰色细体 - 用于辅助信息"""
|
|
38
|
+
return _wrap("\033[2m\033[90m", text)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def dim(text: str) -> str:
|
|
42
|
+
"""暗色细体 - 用于状态指示"""
|
|
43
|
+
return _wrap("\033[2m", text)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def code_label(text: str) -> str:
|
|
47
|
+
"""黄色粗体 - 用于代码标签"""
|
|
48
|
+
return _wrap("\033[1m\033[33m", text)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def quote_prefix(text: str) -> str:
|
|
52
|
+
"""暗青色 - 用于引用"""
|
|
53
|
+
return _wrap("\033[2m\033[36m", text)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def emphasis(text: str) -> str:
|
|
57
|
+
"""白色粗体 - 用于强调"""
|
|
58
|
+
return _wrap("\033[1m", text)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def success(text: str) -> str:
|
|
62
|
+
"""绿色 - 用于成功/完成状态"""
|
|
63
|
+
return _wrap("\033[32m", text)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def error(text: str) -> str:
|
|
67
|
+
"""红色 - 用于错误状态"""
|
|
68
|
+
return _wrap("\033[31m", text)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def warning(text: str) -> str:
|
|
72
|
+
"""黄色 - 用于警告"""
|
|
73
|
+
return _wrap("\033[33m", text)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def info(text: str) -> str:
|
|
77
|
+
"""蓝色 - 用于信息"""
|
|
78
|
+
return _wrap("\033[34m", text)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def is_enabled() -> bool:
|
|
82
|
+
return _ENABLED
|
voxcli/util/markdown.py
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
"""终端 Markdown 渲染器 - 将 Markdown 文本流式渲染到终端"""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
import sys
|
|
5
|
+
from typing import Optional, TextIO
|
|
6
|
+
|
|
7
|
+
from .ansi import heading, section, subtle, quote_prefix, emphasis, code_label
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class TerminalMarkdownRenderer:
|
|
11
|
+
def __init__(self, stream: Optional[TextIO] = None):
|
|
12
|
+
self._stream = stream or sys.stdout
|
|
13
|
+
self._buffer = ""
|
|
14
|
+
self._in_code_block = False
|
|
15
|
+
self._in_blockquote = False
|
|
16
|
+
self._list_level = 0
|
|
17
|
+
|
|
18
|
+
def append(self, text: str):
|
|
19
|
+
self._buffer += text
|
|
20
|
+
self._flush()
|
|
21
|
+
|
|
22
|
+
def finish(self):
|
|
23
|
+
if self._buffer.strip():
|
|
24
|
+
self._render(self._buffer)
|
|
25
|
+
self._buffer = ""
|
|
26
|
+
|
|
27
|
+
def _flush(self):
|
|
28
|
+
while "\n" in self._buffer:
|
|
29
|
+
idx = self._buffer.index("\n")
|
|
30
|
+
line = self._buffer[:idx]
|
|
31
|
+
self._buffer = self._buffer[idx + 1:]
|
|
32
|
+
self._render(line)
|
|
33
|
+
if len(self._buffer) > 200:
|
|
34
|
+
self._render(self._buffer)
|
|
35
|
+
self._buffer = ""
|
|
36
|
+
|
|
37
|
+
def _render(self, line: str):
|
|
38
|
+
stripped = line.strip()
|
|
39
|
+
|
|
40
|
+
if stripped.startswith("```"):
|
|
41
|
+
self._in_code_block = not self._in_code_block
|
|
42
|
+
if self._in_code_block:
|
|
43
|
+
self._write(code_label(stripped[3:] or "code"))
|
|
44
|
+
return
|
|
45
|
+
|
|
46
|
+
if self._in_code_block:
|
|
47
|
+
self._write(f" {line}")
|
|
48
|
+
return
|
|
49
|
+
|
|
50
|
+
if stripped.startswith("> "):
|
|
51
|
+
self._write(quote_prefix(stripped))
|
|
52
|
+
return
|
|
53
|
+
|
|
54
|
+
if stripped.startswith("# "):
|
|
55
|
+
self._write(heading(stripped[2:]))
|
|
56
|
+
return
|
|
57
|
+
|
|
58
|
+
if stripped.startswith("## "):
|
|
59
|
+
self._write(section(stripped[3:]))
|
|
60
|
+
return
|
|
61
|
+
|
|
62
|
+
if stripped.startswith("### "):
|
|
63
|
+
self._write(f"\033[1;36m{stripped[4:]}\033[0m")
|
|
64
|
+
return
|
|
65
|
+
|
|
66
|
+
if stripped.startswith("- ") or stripped.startswith("* "):
|
|
67
|
+
self._write(f" • {stripped[2:]}")
|
|
68
|
+
return
|
|
69
|
+
|
|
70
|
+
if re.match(r"^\d+\. ", stripped):
|
|
71
|
+
self._write(f" {stripped}")
|
|
72
|
+
return
|
|
73
|
+
|
|
74
|
+
if stripped.startswith("---") or stripped.startswith("***"):
|
|
75
|
+
self._write(subtle("─" * 40))
|
|
76
|
+
return
|
|
77
|
+
|
|
78
|
+
if stripped == "":
|
|
79
|
+
self._write("")
|
|
80
|
+
return
|
|
81
|
+
|
|
82
|
+
rendered = self._render_inline(stripped)
|
|
83
|
+
self._write(rendered)
|
|
84
|
+
|
|
85
|
+
@staticmethod
|
|
86
|
+
def _render_inline(text: str) -> str:
|
|
87
|
+
text = re.sub(
|
|
88
|
+
r"\[([^\]]+)\]\(([^)]+)\)",
|
|
89
|
+
lambda m: f"{emphasis(m.group(1))} ({subtle(m.group(2))})",
|
|
90
|
+
text,
|
|
91
|
+
)
|
|
92
|
+
text = re.sub(r"`([^`]+)`", lambda m: code_label(m.group(1)), text)
|
|
93
|
+
text = re.sub(r"\*\*([^*]+)\*\*", lambda m: emphasis(m.group(1)), text)
|
|
94
|
+
text = re.sub(r"\*([^*]+)\*", lambda m: f"\033[3m{m.group(1)}\033[0m", text)
|
|
95
|
+
return text
|
|
96
|
+
|
|
97
|
+
def _write(self, text: str):
|
|
98
|
+
print(text, file=self._stream)
|
voxcli/web/__init__.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from .fetch_result import FetchResult
|
|
2
|
+
from .result import SearchResult
|
|
3
|
+
from .network_policy import NetworkPolicy
|
|
4
|
+
from .base import SearchProvider
|
|
5
|
+
from .factory import SearchProviderFactory
|
|
6
|
+
from .zhipu import ZhipuSearchProvider
|
|
7
|
+
from .serpapi import SerpApiSearchProvider
|
|
8
|
+
from .searxng import SearxngSearchProvider
|
|
9
|
+
from .fetcher import WebFetcher
|
|
10
|
+
from .extractor import HtmlExtractor
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"FetchResult", "SearchResult", "NetworkPolicy",
|
|
14
|
+
"SearchProvider", "SearchProviderFactory",
|
|
15
|
+
"ZhipuSearchProvider", "SerpApiSearchProvider", "SearxngSearchProvider",
|
|
16
|
+
"WebFetcher", "HtmlExtractor",
|
|
17
|
+
]
|
voxcli/web/base.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"""搜索提供者接口"""
|
|
2
|
+
|
|
3
|
+
from typing import List, Protocol
|
|
4
|
+
|
|
5
|
+
from .result import SearchResult
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class SearchProvider(Protocol):
|
|
9
|
+
@property
|
|
10
|
+
def name(self) -> str:
|
|
11
|
+
...
|
|
12
|
+
|
|
13
|
+
def search(self, query: str, top_k: int) -> List[SearchResult]:
|
|
14
|
+
...
|
|
15
|
+
|
|
16
|
+
def is_ready(self) -> bool:
|
|
17
|
+
...
|
|
18
|
+
|
|
19
|
+
def unavailable_hint(self) -> str:
|
|
20
|
+
...
|
voxcli/web/extractor.py
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
"""HTML 提取器 - 将 HTML 转换为 Markdown"""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from typing import Dict, Optional
|
|
5
|
+
from urllib.parse import urljoin
|
|
6
|
+
|
|
7
|
+
from bs4 import BeautifulSoup, Tag
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class HtmlExtractor:
|
|
13
|
+
def extract(self, html: str, base_url: str) -> Dict[str, str]:
|
|
14
|
+
soup = BeautifulSoup(html, "lxml")
|
|
15
|
+
title = self._extract_title(soup)
|
|
16
|
+
self._remove_noise(soup)
|
|
17
|
+
markdown = self._to_markdown(soup, base_url)
|
|
18
|
+
return {"title": title, "markdown": markdown}
|
|
19
|
+
|
|
20
|
+
@staticmethod
|
|
21
|
+
def _extract_title(soup: BeautifulSoup) -> str:
|
|
22
|
+
title_tag = soup.find("title")
|
|
23
|
+
return title_tag.get_text(strip=True) if title_tag else ""
|
|
24
|
+
|
|
25
|
+
@staticmethod
|
|
26
|
+
def _remove_noise(soup: BeautifulSoup):
|
|
27
|
+
for selector in ["script", "style", "nav", "footer", "header",
|
|
28
|
+
".sidebar", ".menu", ".ad", ".advertisement",
|
|
29
|
+
".social-share", ".comments", "[role=complementary]"]:
|
|
30
|
+
for elem in soup.select(selector):
|
|
31
|
+
elem.decompose()
|
|
32
|
+
|
|
33
|
+
@staticmethod
|
|
34
|
+
def _to_markdown(soup: BeautifulSoup, base_url: str) -> str:
|
|
35
|
+
parts = []
|
|
36
|
+
body = soup.find("body") or soup
|
|
37
|
+
for elem in body.children:
|
|
38
|
+
if not isinstance(elem, Tag):
|
|
39
|
+
continue
|
|
40
|
+
tag = elem.name.lower() if elem.name else ""
|
|
41
|
+
if tag in ("h1", "h2", "h3", "h4", "h5", "h6"):
|
|
42
|
+
level = int(tag[1])
|
|
43
|
+
parts.append(f"\n{'#' * level} {elem.get_text(strip=True)}\n")
|
|
44
|
+
elif tag == "p":
|
|
45
|
+
text = elem.get_text(strip=True)
|
|
46
|
+
if text:
|
|
47
|
+
parts.append(f"\n{text}\n")
|
|
48
|
+
elif tag in ("ul", "ol"):
|
|
49
|
+
for li in elem.find_all("li"):
|
|
50
|
+
prefix = "- " if tag == "ul" else "1. "
|
|
51
|
+
parts.append(f"{prefix}{li.get_text(strip=True)}\n")
|
|
52
|
+
parts.append("\n")
|
|
53
|
+
elif tag == "pre":
|
|
54
|
+
code = elem.get_text()
|
|
55
|
+
parts.append(f"\n```\n{code}\n```\n")
|
|
56
|
+
elif tag == "code":
|
|
57
|
+
parts.append(f"`{elem.get_text(strip=True)}`")
|
|
58
|
+
elif tag == "a":
|
|
59
|
+
href = elem.get("href", "")
|
|
60
|
+
if href and base_url:
|
|
61
|
+
href = urljoin(base_url, href)
|
|
62
|
+
text = elem.get_text(strip=True)
|
|
63
|
+
if text and href:
|
|
64
|
+
parts.append(f"[{text}]({href})")
|
|
65
|
+
elif tag == "img":
|
|
66
|
+
src = elem.get("src", "")
|
|
67
|
+
if src and base_url:
|
|
68
|
+
src = urljoin(base_url, src)
|
|
69
|
+
alt = elem.get("alt", "")
|
|
70
|
+
parts.append(f"")
|
|
71
|
+
elif tag == "blockquote":
|
|
72
|
+
text = elem.get_text(strip=True)
|
|
73
|
+
if text:
|
|
74
|
+
parts.append(f"\n> {text}\n")
|
|
75
|
+
elif tag in ("table", "div", "section", "article", "main"):
|
|
76
|
+
parts.append(HtmlExtractor._to_markdown(elem, base_url))
|
|
77
|
+
return "\n".join(parts).strip()
|
voxcli/web/factory.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""搜索提供者工厂 - 根据环境变量创建搜索提供者"""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import logging
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
7
|
+
from ..config import pai_config
|
|
8
|
+
from .base import SearchProvider
|
|
9
|
+
from .zhipu import ZhipuSearchProvider
|
|
10
|
+
from .serpapi import SerpApiSearchProvider
|
|
11
|
+
from .searxng import SearxngSearchProvider
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class SearchProviderFactory:
|
|
17
|
+
_instance: Optional[SearchProvider] = None
|
|
18
|
+
|
|
19
|
+
@classmethod
|
|
20
|
+
def create(cls) -> SearchProvider:
|
|
21
|
+
if cls._instance is not None:
|
|
22
|
+
return cls._instance
|
|
23
|
+
|
|
24
|
+
provider_name = os.environ.get("SEARCH_PROVIDER", "serpapi").lower()
|
|
25
|
+
config = pai_config.get_provider("glm") or pai_config.get_provider("deepseek") or {}
|
|
26
|
+
|
|
27
|
+
if provider_name == "zhipu":
|
|
28
|
+
api_key = config.get("api_key", "") if config else ""
|
|
29
|
+
cls._instance = ZhipuSearchProvider(api_key=api_key)
|
|
30
|
+
elif provider_name == "searxng":
|
|
31
|
+
base_url = os.environ.get("SEARXNG_BASE_URL", "http://localhost:8888")
|
|
32
|
+
cls._instance = SearxngSearchProvider(base_url=base_url)
|
|
33
|
+
else:
|
|
34
|
+
api_key = os.environ.get("SERPAPI_API_KEY", "")
|
|
35
|
+
cls._instance = SerpApiSearchProvider(api_key=api_key)
|
|
36
|
+
|
|
37
|
+
logger.info("Created search provider: %s", cls._instance.name)
|
|
38
|
+
return cls._instance
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""网页抓取结果"""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@dataclass
|
|
7
|
+
class FetchResult:
|
|
8
|
+
url: str
|
|
9
|
+
title: str
|
|
10
|
+
markdown: str
|
|
11
|
+
content_length: int
|
|
12
|
+
truncated: bool
|
|
13
|
+
|
|
14
|
+
@staticmethod
|
|
15
|
+
def ok(url: str, title: str, markdown: str,
|
|
16
|
+
content_length: int, truncated: bool) -> "FetchResult":
|
|
17
|
+
return FetchResult(url, title, markdown, content_length, truncated)
|
|
18
|
+
|
|
19
|
+
@property
|
|
20
|
+
def body_empty(self) -> bool:
|
|
21
|
+
return not self.markdown.strip()
|
|
22
|
+
|
|
23
|
+
@property
|
|
24
|
+
def hint(self) -> str:
|
|
25
|
+
if self.body_empty:
|
|
26
|
+
return "正文为空,可能是 SPA 或防爬墙(已知边界,不重试)"
|
|
27
|
+
return ""
|
voxcli/web/fetcher.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""网页抓取器 - 使用 httpx 获取网页内容"""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from typing import Dict, Optional
|
|
5
|
+
|
|
6
|
+
import httpx
|
|
7
|
+
|
|
8
|
+
logger = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
_DEFAULT_TIMEOUT = 30
|
|
11
|
+
_MAX_RESPONSE_BYTES = 5 * 1024 * 1024 # 5MB
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class WebFetcher:
|
|
15
|
+
def __init__(self, timeout: int = _DEFAULT_TIMEOUT):
|
|
16
|
+
self._timeout = timeout
|
|
17
|
+
|
|
18
|
+
def fetch(self, url: str) -> Dict[str, str]:
|
|
19
|
+
logger.info("Fetching URL: %s", url)
|
|
20
|
+
response = httpx.get(
|
|
21
|
+
url,
|
|
22
|
+
timeout=self._timeout,
|
|
23
|
+
follow_redirects=True,
|
|
24
|
+
headers={
|
|
25
|
+
"User-Agent": ("Mozilla/5.0 (compatible; VoxCode/1.0; "
|
|
26
|
+
"+https://github.com/vox-code)"),
|
|
27
|
+
"Accept": "text/html,application/xhtml+xml;q=0.9,*/*;q=0.8",
|
|
28
|
+
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
|
|
29
|
+
},
|
|
30
|
+
)
|
|
31
|
+
response.raise_for_status()
|
|
32
|
+
|
|
33
|
+
body = response.text
|
|
34
|
+
if len(body) > _MAX_RESPONSE_BYTES:
|
|
35
|
+
body = body[:_MAX_RESPONSE_BYTES]
|
|
36
|
+
logger.warning("Response truncated to %d bytes", _MAX_RESPONSE_BYTES)
|
|
37
|
+
|
|
38
|
+
return {
|
|
39
|
+
"url": str(response.url),
|
|
40
|
+
"body": body,
|
|
41
|
+
"content_type": response.headers.get("content-type", ""),
|
|
42
|
+
}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
"""网络策略 - SSRF 防护和速率限制"""
|
|
2
|
+
|
|
3
|
+
import ipaddress
|
|
4
|
+
import time
|
|
5
|
+
import re
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from typing import Optional
|
|
8
|
+
from urllib.parse import urlparse
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class NetworkPolicy:
|
|
13
|
+
rate_limit_per_minute: int = 10
|
|
14
|
+
allowed_schemes: tuple = ("http", "https")
|
|
15
|
+
block_private_ip: bool = True
|
|
16
|
+
|
|
17
|
+
def __init__(self, rate_per_minute: int = 10):
|
|
18
|
+
self.rate_limit_per_minute = rate_per_minute
|
|
19
|
+
self._call_timestamps: list = []
|
|
20
|
+
|
|
21
|
+
def check_url(self, url: str) -> Optional[str]:
|
|
22
|
+
if not url:
|
|
23
|
+
return "URL 为空"
|
|
24
|
+
parsed = urlparse(url)
|
|
25
|
+
if parsed.scheme not in ("http", "https"):
|
|
26
|
+
return f"不支持的协议: {parsed.scheme}"
|
|
27
|
+
return self._check_ip(parsed.hostname) if self.block_private_ip else None
|
|
28
|
+
|
|
29
|
+
def _check_ip(self, hostname: Optional[str]) -> Optional[str]:
|
|
30
|
+
if not hostname:
|
|
31
|
+
return "无法解析 hostname"
|
|
32
|
+
if hostname in ("localhost", "127.0.0.1", "::1"):
|
|
33
|
+
return f"禁止访问本地地址: {hostname}"
|
|
34
|
+
try:
|
|
35
|
+
addr = ipaddress.ip_address(hostname)
|
|
36
|
+
if addr.is_private or addr.is_loopback or addr.is_link_local:
|
|
37
|
+
return f"禁止访问内网地址: {hostname}"
|
|
38
|
+
except ValueError:
|
|
39
|
+
pass
|
|
40
|
+
return None
|
|
41
|
+
|
|
42
|
+
def acquire(self) -> Optional[str]:
|
|
43
|
+
now = time.time()
|
|
44
|
+
window = 60.0
|
|
45
|
+
self._call_timestamps = [t for t in self._call_timestamps if now - t < window]
|
|
46
|
+
if len(self._call_timestamps) >= self.rate_limit_per_minute:
|
|
47
|
+
return f"请求频率超过限制({self.rate_limit_per_minute}/分钟)"
|
|
48
|
+
self._call_timestamps.append(now)
|
|
49
|
+
return None
|
voxcli/web/result.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""搜索结果"""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@dataclass
|
|
7
|
+
class SearchResult:
|
|
8
|
+
title: str
|
|
9
|
+
url: str
|
|
10
|
+
snippet: str
|
|
11
|
+
position: int
|
|
12
|
+
source: str = ""
|
|
13
|
+
|
|
14
|
+
def __post_init__(self):
|
|
15
|
+
self._source = self.source
|
|
16
|
+
|
|
17
|
+
@property
|
|
18
|
+
def source(self) -> str:
|
|
19
|
+
return self._source
|
|
20
|
+
|
|
21
|
+
@source.setter
|
|
22
|
+
def source(self, value: str):
|
|
23
|
+
self._source = value
|
voxcli/web/searxng.py
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
"""SearXNG 搜索提供者(自托管)"""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
from typing import List, Optional
|
|
6
|
+
|
|
7
|
+
import httpx
|
|
8
|
+
|
|
9
|
+
from .result import SearchResult
|
|
10
|
+
from .base import SearchProvider
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class SearxngSearchProvider:
|
|
16
|
+
def __init__(self, base_url: str = "http://localhost:8888"):
|
|
17
|
+
self._base_url = base_url.rstrip("/")
|
|
18
|
+
|
|
19
|
+
@property
|
|
20
|
+
def name(self) -> str:
|
|
21
|
+
return "SearXNG"
|
|
22
|
+
|
|
23
|
+
def search(self, query: str, top_k: int = 5) -> List[SearchResult]:
|
|
24
|
+
response = httpx.get(
|
|
25
|
+
f"{self._base_url}/search",
|
|
26
|
+
params={"q": query, "format": "json", "language": "zh-CN", "categories": "general"},
|
|
27
|
+
timeout=30,
|
|
28
|
+
)
|
|
29
|
+
response.raise_for_status()
|
|
30
|
+
data = response.json()
|
|
31
|
+
return self._parse_results(data, top_k)
|
|
32
|
+
|
|
33
|
+
def is_ready(self) -> bool:
|
|
34
|
+
try:
|
|
35
|
+
httpx.get(f"{self._base_url}/health", timeout=5).raise_for_status()
|
|
36
|
+
return True
|
|
37
|
+
except Exception:
|
|
38
|
+
return False
|
|
39
|
+
|
|
40
|
+
def unavailable_hint(self) -> str:
|
|
41
|
+
return f"SearXNG 无法连接({self._base_url}),请确保服务已启动"
|
|
42
|
+
|
|
43
|
+
@staticmethod
|
|
44
|
+
def _parse_results(data: dict, top_k: int) -> List[SearchResult]:
|
|
45
|
+
results = []
|
|
46
|
+
items = data.get("results", [])
|
|
47
|
+
for i, item in enumerate(items[:top_k]):
|
|
48
|
+
results.append(SearchResult(
|
|
49
|
+
title=item.get("title", ""),
|
|
50
|
+
url=item.get("url", ""),
|
|
51
|
+
snippet=item.get("content", ""),
|
|
52
|
+
position=i + 1,
|
|
53
|
+
source="searxng",
|
|
54
|
+
))
|
|
55
|
+
return results
|
voxcli/web/serpapi.py
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
"""SerpAPI 搜索提供者"""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
from typing import List, Optional
|
|
6
|
+
|
|
7
|
+
import httpx
|
|
8
|
+
|
|
9
|
+
from .result import SearchResult
|
|
10
|
+
from .base import SearchProvider
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class SerpApiSearchProvider:
|
|
16
|
+
def __init__(self, api_key: str = ""):
|
|
17
|
+
self._api_key = api_key
|
|
18
|
+
|
|
19
|
+
@property
|
|
20
|
+
def name(self) -> str:
|
|
21
|
+
return "SerpAPI"
|
|
22
|
+
|
|
23
|
+
def search(self, query: str, top_k: int = 5) -> List[SearchResult]:
|
|
24
|
+
if not self._api_key:
|
|
25
|
+
return []
|
|
26
|
+
response = httpx.get(
|
|
27
|
+
"https://serpapi.com/search",
|
|
28
|
+
params={"q": query, "api_key": self._api_key, "engine": "google", "num": min(top_k, 10)},
|
|
29
|
+
timeout=30,
|
|
30
|
+
)
|
|
31
|
+
response.raise_for_status()
|
|
32
|
+
data = response.json()
|
|
33
|
+
return self._parse_results(data)
|
|
34
|
+
|
|
35
|
+
def is_ready(self) -> bool:
|
|
36
|
+
return bool(self._api_key)
|
|
37
|
+
|
|
38
|
+
def unavailable_hint(self) -> str:
|
|
39
|
+
return "SerpAPI 搜索需要配置 SERPAPI_API_KEY 环境变量"
|
|
40
|
+
|
|
41
|
+
@staticmethod
|
|
42
|
+
def _parse_results(data: dict) -> List[SearchResult]:
|
|
43
|
+
results = []
|
|
44
|
+
organic = data.get("organic_results", [])
|
|
45
|
+
for i, item in enumerate(organic[:10]):
|
|
46
|
+
results.append(SearchResult(
|
|
47
|
+
title=item.get("title", ""),
|
|
48
|
+
url=item.get("link", ""),
|
|
49
|
+
snippet=item.get("snippet", ""),
|
|
50
|
+
position=i + 1,
|
|
51
|
+
source="serpapi",
|
|
52
|
+
))
|
|
53
|
+
return results
|
voxcli/web/zhipu.py
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
"""智谱搜索提供者 - 使用智谱 API 进行搜索"""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
from typing import List, Optional
|
|
6
|
+
|
|
7
|
+
import httpx
|
|
8
|
+
|
|
9
|
+
from .result import SearchResult
|
|
10
|
+
from .base import SearchProvider
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class ZhipuSearchProvider:
|
|
16
|
+
def __init__(self, api_key: str = "", base_url: str = "https://open.bigmodel.cn/api/paas/v4"):
|
|
17
|
+
self._api_key = api_key
|
|
18
|
+
self._base_url = base_url
|
|
19
|
+
|
|
20
|
+
@property
|
|
21
|
+
def name(self) -> str:
|
|
22
|
+
return "zhipu"
|
|
23
|
+
|
|
24
|
+
def search(self, query: str, top_k: int = 5) -> List[SearchResult]:
|
|
25
|
+
if not self._api_key:
|
|
26
|
+
return []
|
|
27
|
+
response = httpx.post(
|
|
28
|
+
f"{self._base_url}/tools/web_search",
|
|
29
|
+
headers={"Authorization": f"Bearer {self._api_key}", "Content-Type": "application/json"},
|
|
30
|
+
json={"query": query, "top_k": min(top_k, 10)},
|
|
31
|
+
timeout=30,
|
|
32
|
+
)
|
|
33
|
+
response.raise_for_status()
|
|
34
|
+
data = response.json()
|
|
35
|
+
return self._parse_results(data)
|
|
36
|
+
|
|
37
|
+
def is_ready(self) -> bool:
|
|
38
|
+
return bool(self._api_key)
|
|
39
|
+
|
|
40
|
+
def unavailable_hint(self) -> str:
|
|
41
|
+
return "智谱搜索需要配置 GLM_API_KEY 环境变量"
|
|
42
|
+
|
|
43
|
+
@staticmethod
|
|
44
|
+
def _parse_results(data: dict) -> List[SearchResult]:
|
|
45
|
+
results = []
|
|
46
|
+
items = data.get("results", []) or data.get("data", [])
|
|
47
|
+
for i, item in enumerate(items):
|
|
48
|
+
results.append(SearchResult(
|
|
49
|
+
title=item.get("title", ""),
|
|
50
|
+
url=item.get("url", "") or item.get("link", ""),
|
|
51
|
+
snippet=item.get("snippet", "") or item.get("content", ""),
|
|
52
|
+
position=i + 1,
|
|
53
|
+
source="zhipu",
|
|
54
|
+
))
|
|
55
|
+
return results
|