winwin-cli 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- winwin_cli-0.1.0/LICENSE +21 -0
- winwin_cli-0.1.0/PKG-INFO +140 -0
- winwin_cli-0.1.0/README.md +108 -0
- winwin_cli-0.1.0/pyproject.toml +55 -0
- winwin_cli-0.1.0/setup.cfg +4 -0
- winwin_cli-0.1.0/src/winwin_cli/__init__.py +3 -0
- winwin_cli-0.1.0/src/winwin_cli/cli.py +45 -0
- winwin_cli-0.1.0/src/winwin_cli/convert.py +205 -0
- winwin_cli-0.1.0/src/winwin_cli/kb_search/__init__.py +37 -0
- winwin_cli-0.1.0/src/winwin_cli/kb_search/cli.py +42 -0
- winwin_cli-0.1.0/src/winwin_cli/kb_search/commands/__init__.py +72 -0
- winwin_cli-0.1.0/src/winwin_cli/kb_search/commands/add.py +80 -0
- winwin_cli-0.1.0/src/winwin_cli/kb_search/commands/disable.py +41 -0
- winwin_cli-0.1.0/src/winwin_cli/kb_search/commands/enable.py +41 -0
- winwin_cli-0.1.0/src/winwin_cli/kb_search/commands/index.py +57 -0
- winwin_cli-0.1.0/src/winwin_cli/kb_search/commands/info.py +34 -0
- winwin_cli-0.1.0/src/winwin_cli/kb_search/commands/list.py +58 -0
- winwin_cli-0.1.0/src/winwin_cli/kb_search/commands/remove.py +39 -0
- winwin_cli-0.1.0/src/winwin_cli/kb_search/commands/search.py +86 -0
- winwin_cli-0.1.0/src/winwin_cli/kb_search/commands/status.py +109 -0
- winwin_cli-0.1.0/src/winwin_cli/kb_search/config.py +146 -0
- winwin_cli-0.1.0/src/winwin_cli/kb_search/indexer.py +531 -0
- winwin_cli-0.1.0/src/winwin_cli/kb_search/markitdown.py +99 -0
- winwin_cli-0.1.0/src/winwin_cli/kb_search/models.py +87 -0
- winwin_cli-0.1.0/src/winwin_cli/kb_search/search.py +217 -0
- winwin_cli-0.1.0/src/winwin_cli.egg-info/PKG-INFO +140 -0
- winwin_cli-0.1.0/src/winwin_cli.egg-info/SOURCES.txt +34 -0
- winwin_cli-0.1.0/src/winwin_cli.egg-info/dependency_links.txt +1 -0
- winwin_cli-0.1.0/src/winwin_cli.egg-info/entry_points.txt +2 -0
- winwin_cli-0.1.0/src/winwin_cli.egg-info/requires.txt +8 -0
- winwin_cli-0.1.0/src/winwin_cli.egg-info/top_level.txt +1 -0
- winwin_cli-0.1.0/tests/test_convert.py +189 -0
- winwin_cli-0.1.0/tests/test_kb_search_cli.py +239 -0
- winwin_cli-0.1.0/tests/test_kb_search_config.py +203 -0
- winwin_cli-0.1.0/tests/test_kb_search_indexer.py +253 -0
- winwin_cli-0.1.0/tests/test_kb_search_search.py +233 -0
winwin_cli-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 xuruikun
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: winwin-cli
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: CLI 封装工具,专为 AI 使用设计
|
|
5
|
+
Author: xuruikun
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/yourusername/winwin-cli
|
|
8
|
+
Project-URL: Repository, https://github.com/yourusername/winwin-cli
|
|
9
|
+
Project-URL: Issues, https://github.com/yourusername/winwin-cli/issues
|
|
10
|
+
Keywords: cli,ai,knowledge-base,search,document-conversion
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
18
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
19
|
+
Classifier: Topic :: Utilities
|
|
20
|
+
Requires-Python: >=3.11
|
|
21
|
+
Description-Content-Type: text/markdown
|
|
22
|
+
License-File: LICENSE
|
|
23
|
+
Requires-Dist: click>=8.1.0
|
|
24
|
+
Requires-Dist: pyyaml>=6.0.0
|
|
25
|
+
Requires-Dist: orjson>=3.9.0
|
|
26
|
+
Requires-Dist: jieba>=0.42.0
|
|
27
|
+
Requires-Dist: rank-bm25>=0.2.0
|
|
28
|
+
Requires-Dist: pydantic>=2.0.0
|
|
29
|
+
Requires-Dist: tqdm>=4.65.0
|
|
30
|
+
Requires-Dist: markitdown>=0.1.4
|
|
31
|
+
Dynamic: license-file
|
|
32
|
+
|
|
33
|
+
# winwin-cli
|
|
34
|
+
|
|
35
|
+
CLI 封装工具,专为 AI 使用设计。
|
|
36
|
+
|
|
37
|
+
## 功能
|
|
38
|
+
|
|
39
|
+
### kb-search - 知识库检索工具
|
|
40
|
+
|
|
41
|
+
快速搜索你的文档,支持基于 BM25 的全文检索。
|
|
42
|
+
|
|
43
|
+
**特性:**
|
|
44
|
+
- 支持多种文档格式(Markdown、HTML、PDF、Office 文档等)
|
|
45
|
+
- 基于 BM25 算法的全文检索
|
|
46
|
+
- 自动文档索引和更新
|
|
47
|
+
- 中文分词支持(jieba)
|
|
48
|
+
- 多知识库管理
|
|
49
|
+
- JSON 输出格式,便于 AI 解析
|
|
50
|
+
|
|
51
|
+
**安装:**
|
|
52
|
+
```bash
|
|
53
|
+
# 使用 uvx 直接运行(无需安装)
|
|
54
|
+
uvx winwin-cli kb-search --help
|
|
55
|
+
|
|
56
|
+
# 或使用 uv pip 安装
|
|
57
|
+
uv pip install winwin-cli
|
|
58
|
+
winwin-cli kb-search --help
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
**快速开始:**
|
|
62
|
+
```bash
|
|
63
|
+
# 添加文档到知识库
|
|
64
|
+
winwin-cli kb-search add my-kb ./docs
|
|
65
|
+
|
|
66
|
+
# 搜索文档
|
|
67
|
+
winwin-cli kb-search search my-kb "查询关键词"
|
|
68
|
+
|
|
69
|
+
# 列出所有知识库
|
|
70
|
+
winwin-cli kb-search list
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
### convert - 文档转换工具
|
|
74
|
+
|
|
75
|
+
将各种格式的文档转换为 Markdown 或纯文本。
|
|
76
|
+
|
|
77
|
+
**支持格式:**
|
|
78
|
+
- Markdown (.md, .markdown)
|
|
79
|
+
- HTML (.html, .htm)
|
|
80
|
+
- PDF (.pdf)
|
|
81
|
+
- Word 文档 (.docx, .doc)
|
|
82
|
+
- PowerPoint (.pptx, .ppt)
|
|
83
|
+
- Excel (.xlsx, .xls)
|
|
84
|
+
- 纯文本 (.txt)
|
|
85
|
+
|
|
86
|
+
**使用方法:**
|
|
87
|
+
```bash
|
|
88
|
+
# 转换单个文件
|
|
89
|
+
winwin-cli convert document.docx output.md
|
|
90
|
+
|
|
91
|
+
# 转换为纯文本
|
|
92
|
+
winwin-cli convert document.pdf --format text
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
## 开发
|
|
96
|
+
|
|
97
|
+
**环境设置:**
|
|
98
|
+
```bash
|
|
99
|
+
# 安装 uv
|
|
100
|
+
curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
101
|
+
|
|
102
|
+
# 安装依赖
|
|
103
|
+
uv sync
|
|
104
|
+
|
|
105
|
+
# 激活虚拟环境
|
|
106
|
+
source .venv/bin/activate
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
**运行测试:**
|
|
110
|
+
```bash
|
|
111
|
+
uv pytest
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
**构建:**
|
|
115
|
+
```bash
|
|
116
|
+
uv build
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
## 项目结构
|
|
120
|
+
|
|
121
|
+
```
|
|
122
|
+
winwin-cli/
|
|
123
|
+
├── src/winwin_cli/ # 源代码
|
|
124
|
+
│ ├── cli.py # 主入口
|
|
125
|
+
│ ├── convert.py # 文档转换模块
|
|
126
|
+
│ └── kb_search/ # 知识库检索模块
|
|
127
|
+
│ ├── cli.py # kb-search 命令行
|
|
128
|
+
│ ├── config.py # 配置管理
|
|
129
|
+
│ ├── indexer.py # 文档索引
|
|
130
|
+
│ ├── search.py # 搜索引擎
|
|
131
|
+
│ ├── models.py # 数据模型
|
|
132
|
+
│ └── commands/ # 子命令
|
|
133
|
+
├── tests/ # 测试文件
|
|
134
|
+
├── docs/ # 文档
|
|
135
|
+
└── pyproject.toml # 项目配置
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
## 许可证
|
|
139
|
+
|
|
140
|
+
MIT License
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
# winwin-cli
|
|
2
|
+
|
|
3
|
+
CLI 封装工具,专为 AI 使用设计。
|
|
4
|
+
|
|
5
|
+
## 功能
|
|
6
|
+
|
|
7
|
+
### kb-search - 知识库检索工具
|
|
8
|
+
|
|
9
|
+
快速搜索你的文档,支持基于 BM25 的全文检索。
|
|
10
|
+
|
|
11
|
+
**特性:**
|
|
12
|
+
- 支持多种文档格式(Markdown、HTML、PDF、Office 文档等)
|
|
13
|
+
- 基于 BM25 算法的全文检索
|
|
14
|
+
- 自动文档索引和更新
|
|
15
|
+
- 中文分词支持(jieba)
|
|
16
|
+
- 多知识库管理
|
|
17
|
+
- JSON 输出格式,便于 AI 解析
|
|
18
|
+
|
|
19
|
+
**安装:**
|
|
20
|
+
```bash
|
|
21
|
+
# 使用 uvx 直接运行(无需安装)
|
|
22
|
+
uvx winwin-cli kb-search --help
|
|
23
|
+
|
|
24
|
+
# 或使用 uv pip 安装
|
|
25
|
+
uv pip install winwin-cli
|
|
26
|
+
winwin-cli kb-search --help
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
**快速开始:**
|
|
30
|
+
```bash
|
|
31
|
+
# 添加文档到知识库
|
|
32
|
+
winwin-cli kb-search add my-kb ./docs
|
|
33
|
+
|
|
34
|
+
# 搜索文档
|
|
35
|
+
winwin-cli kb-search search my-kb "查询关键词"
|
|
36
|
+
|
|
37
|
+
# 列出所有知识库
|
|
38
|
+
winwin-cli kb-search list
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
### convert - 文档转换工具
|
|
42
|
+
|
|
43
|
+
将各种格式的文档转换为 Markdown 或纯文本。
|
|
44
|
+
|
|
45
|
+
**支持格式:**
|
|
46
|
+
- Markdown (.md, .markdown)
|
|
47
|
+
- HTML (.html, .htm)
|
|
48
|
+
- PDF (.pdf)
|
|
49
|
+
- Word 文档 (.docx, .doc)
|
|
50
|
+
- PowerPoint (.pptx, .ppt)
|
|
51
|
+
- Excel (.xlsx, .xls)
|
|
52
|
+
- 纯文本 (.txt)
|
|
53
|
+
|
|
54
|
+
**使用方法:**
|
|
55
|
+
```bash
|
|
56
|
+
# 转换单个文件
|
|
57
|
+
winwin-cli convert document.docx output.md
|
|
58
|
+
|
|
59
|
+
# 转换为纯文本
|
|
60
|
+
winwin-cli convert document.pdf --format text
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
## 开发
|
|
64
|
+
|
|
65
|
+
**环境设置:**
|
|
66
|
+
```bash
|
|
67
|
+
# 安装 uv
|
|
68
|
+
curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
69
|
+
|
|
70
|
+
# 安装依赖
|
|
71
|
+
uv sync
|
|
72
|
+
|
|
73
|
+
# 激活虚拟环境
|
|
74
|
+
source .venv/bin/activate
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
**运行测试:**
|
|
78
|
+
```bash
|
|
79
|
+
uv pytest
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
**构建:**
|
|
83
|
+
```bash
|
|
84
|
+
uv build
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
## 项目结构
|
|
88
|
+
|
|
89
|
+
```
|
|
90
|
+
winwin-cli/
|
|
91
|
+
├── src/winwin_cli/ # 源代码
|
|
92
|
+
│ ├── cli.py # 主入口
|
|
93
|
+
│ ├── convert.py # 文档转换模块
|
|
94
|
+
│ └── kb_search/ # 知识库检索模块
|
|
95
|
+
│ ├── cli.py # kb-search 命令行
|
|
96
|
+
│ ├── config.py # 配置管理
|
|
97
|
+
│ ├── indexer.py # 文档索引
|
|
98
|
+
│ ├── search.py # 搜索引擎
|
|
99
|
+
│ ├── models.py # 数据模型
|
|
100
|
+
│ └── commands/ # 子命令
|
|
101
|
+
├── tests/ # 测试文件
|
|
102
|
+
├── docs/ # 文档
|
|
103
|
+
└── pyproject.toml # 项目配置
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
## 许可证
|
|
107
|
+
|
|
108
|
+
MIT License
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "winwin-cli"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "CLI 封装工具,专为 AI 使用设计"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.11"
|
|
7
|
+
license = {text = "MIT"}
|
|
8
|
+
authors = [
|
|
9
|
+
{name = "xuruikun"}
|
|
10
|
+
]
|
|
11
|
+
keywords = ["cli", "ai", "knowledge-base", "search", "document-conversion"]
|
|
12
|
+
classifiers = [
|
|
13
|
+
"Development Status :: 3 - Alpha",
|
|
14
|
+
"Intended Audience :: Developers",
|
|
15
|
+
"License :: OSI Approved :: MIT License",
|
|
16
|
+
"Programming Language :: Python :: 3",
|
|
17
|
+
"Programming Language :: Python :: 3.11",
|
|
18
|
+
"Programming Language :: Python :: 3.12",
|
|
19
|
+
"Programming Language :: Python :: 3.13",
|
|
20
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
21
|
+
"Topic :: Utilities",
|
|
22
|
+
]
|
|
23
|
+
dependencies = [
|
|
24
|
+
"click>=8.1.0",
|
|
25
|
+
"pyyaml>=6.0.0",
|
|
26
|
+
"orjson>=3.9.0",
|
|
27
|
+
"jieba>=0.42.0",
|
|
28
|
+
"rank-bm25>=0.2.0",
|
|
29
|
+
"pydantic>=2.0.0",
|
|
30
|
+
"tqdm>=4.65.0",
|
|
31
|
+
"markitdown>=0.1.4",
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
[project.urls]
|
|
35
|
+
Homepage = "https://github.com/yourusername/winwin-cli"
|
|
36
|
+
Repository = "https://github.com/yourusername/winwin-cli"
|
|
37
|
+
Issues = "https://github.com/yourusername/winwin-cli/issues"
|
|
38
|
+
|
|
39
|
+
[project.scripts]
|
|
40
|
+
winwin-cli = "winwin_cli.cli:main"
|
|
41
|
+
|
|
42
|
+
[dependency-groups]
|
|
43
|
+
dev = [
|
|
44
|
+
"pytest>=7.4.0",
|
|
45
|
+
"pytest-cov>=4.1.0",
|
|
46
|
+
"pytest-mock>=3.12.0",
|
|
47
|
+
]
|
|
48
|
+
|
|
49
|
+
[tool.uv]
|
|
50
|
+
package = true
|
|
51
|
+
|
|
52
|
+
[tool.pytest.ini_options]
|
|
53
|
+
testpaths = ["tests"]
|
|
54
|
+
python_files = ["test_*.py"]
|
|
55
|
+
addopts = "-v --tb=short"
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
"""winwin-cli 主入口"""
|
|
2
|
+
|
|
3
|
+
import sys
|
|
4
|
+
from typing import Optional
|
|
5
|
+
import click
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@click.group()
|
|
9
|
+
@click.version_option(version="0.1.0")
|
|
10
|
+
def main():
|
|
11
|
+
"""winwin-cli - CLI 封装工具,专为 AI 使用设计
|
|
12
|
+
|
|
13
|
+
提供各种命令行工具的封装,支持 AI 自动化调用。
|
|
14
|
+
"""
|
|
15
|
+
pass
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _import_kb_search():
|
|
19
|
+
"""延迟导入 kb-search 模块"""
|
|
20
|
+
from winwin_cli.kb_search.cli import kb_search
|
|
21
|
+
return kb_search
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _import_convert():
|
|
25
|
+
"""延迟导入 convert 模块"""
|
|
26
|
+
from winwin_cli.convert import convert
|
|
27
|
+
return convert
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
# 注册子命令
|
|
31
|
+
try:
|
|
32
|
+
kb_search = _import_kb_search()
|
|
33
|
+
main.add_command(kb_search, "kb-search")
|
|
34
|
+
except ImportError:
|
|
35
|
+
pass
|
|
36
|
+
|
|
37
|
+
try:
|
|
38
|
+
convert_cmd = _import_convert()
|
|
39
|
+
main.add_command(convert_cmd)
|
|
40
|
+
except ImportError:
|
|
41
|
+
pass
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
if __name__ == "__main__":
|
|
45
|
+
main()
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
"""文档转换模块 - 将各种格式转换为 Markdown"""
|
|
2
|
+
|
|
3
|
+
import sys
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Optional, List
|
|
6
|
+
|
|
7
|
+
import click
|
|
8
|
+
|
|
9
|
+
from winwin_cli.kb_search.markitdown import run_markitdown
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@click.command()
|
|
13
|
+
@click.argument(
|
|
14
|
+
"input_path",
|
|
15
|
+
type=click.Path(exists=True),
|
|
16
|
+
required=True,
|
|
17
|
+
)
|
|
18
|
+
@click.option(
|
|
19
|
+
"--output",
|
|
20
|
+
"-o",
|
|
21
|
+
type=click.Path(),
|
|
22
|
+
help="输出目录或文件路径(默认:与输入文件同目录)",
|
|
23
|
+
)
|
|
24
|
+
@click.option(
|
|
25
|
+
"--ext",
|
|
26
|
+
"-e",
|
|
27
|
+
multiple=True,
|
|
28
|
+
help="文件扩展名过滤(可多次使用,如:--ext .docx --ext .pdf)",
|
|
29
|
+
)
|
|
30
|
+
@click.option(
|
|
31
|
+
"--overwrite",
|
|
32
|
+
"-f",
|
|
33
|
+
is_flag=True,
|
|
34
|
+
help="覆盖已存在的 Markdown 文件",
|
|
35
|
+
)
|
|
36
|
+
def convert(input_path: str, output: Optional[str], ext: tuple, overwrite: bool):
|
|
37
|
+
"""转换文档为 Markdown 格式
|
|
38
|
+
|
|
39
|
+
支持的输入格式:
|
|
40
|
+
- Office: .docx, .doc, .pptx, .xlsx, .xls
|
|
41
|
+
- PDF: .pdf
|
|
42
|
+
- 图片(OCR): .jpg, .jpeg, .png, .gif, .bmp, .webp
|
|
43
|
+
- 音频(语音转录): .wav, .mp3, .m4a
|
|
44
|
+
- 视频: .mp4, .avi, .mov, .mkv
|
|
45
|
+
- 文本: .html, .htm, .csv, .json, .xml
|
|
46
|
+
|
|
47
|
+
用例:
|
|
48
|
+
# 转换单个文件
|
|
49
|
+
winwin-cli convert document.docx
|
|
50
|
+
|
|
51
|
+
# 转换目录中的所有文件
|
|
52
|
+
winwin-cli convert ./docs
|
|
53
|
+
|
|
54
|
+
# 转换并指定输出目录
|
|
55
|
+
winwin-cli convert ./docs -o ./markdown
|
|
56
|
+
|
|
57
|
+
# 只转换特定格式
|
|
58
|
+
winwin-cli convert ./docs --ext .pdf --ext .docx
|
|
59
|
+
|
|
60
|
+
# 覆盖已存在的 Markdown 文件
|
|
61
|
+
winwin-cli convert ./docs --overwrite
|
|
62
|
+
"""
|
|
63
|
+
from tqdm import tqdm
|
|
64
|
+
|
|
65
|
+
input_path_obj = Path(input_path)
|
|
66
|
+
|
|
67
|
+
# 判断是文件还是目录
|
|
68
|
+
if input_path_obj.is_file():
|
|
69
|
+
# 转换单个文件
|
|
70
|
+
_convert_single_file(input_path_obj, output, overwrite)
|
|
71
|
+
else:
|
|
72
|
+
# 转换目录
|
|
73
|
+
_convert_directory(input_path_obj, output, ext, overwrite)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _convert_single_file(
|
|
77
|
+
input_file: Path,
|
|
78
|
+
output_path: Optional[str],
|
|
79
|
+
overwrite: bool,
|
|
80
|
+
):
|
|
81
|
+
"""转换单个文件
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
input_file: 输入文件路径
|
|
85
|
+
output_path: 输出路径(可选)
|
|
86
|
+
overwrite: 是否覆盖已存在的文件
|
|
87
|
+
"""
|
|
88
|
+
# 确定输出路径
|
|
89
|
+
if output_path:
|
|
90
|
+
output_path_obj = Path(output_path)
|
|
91
|
+
if output_path_obj.is_dir():
|
|
92
|
+
# 如果是目录,文件名保持不变,扩展名改为 .md
|
|
93
|
+
output_file = output_path_obj / f"{input_file.stem}.md"
|
|
94
|
+
else:
|
|
95
|
+
# 如果是文件路径,直接使用
|
|
96
|
+
output_file = output_path_obj
|
|
97
|
+
else:
|
|
98
|
+
# 默认保存在同一目录
|
|
99
|
+
output_file = input_file.with_suffix(".md")
|
|
100
|
+
|
|
101
|
+
# 检查是否已存在
|
|
102
|
+
if output_file.exists() and not overwrite:
|
|
103
|
+
click.echo(f"⚠ 跳过(已存在): {output_file}")
|
|
104
|
+
return
|
|
105
|
+
|
|
106
|
+
try:
|
|
107
|
+
click.echo(f"正在转换: {input_file.name}")
|
|
108
|
+
run_markitdown(str(input_file), str(output_file), "md")
|
|
109
|
+
click.echo(f"✓ 转换成功: {output_file}")
|
|
110
|
+
except Exception as e:
|
|
111
|
+
click.echo(f"✗ 转换失败: {input_file.name} - {e}", err=True)
|
|
112
|
+
sys.exit(1)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def _convert_directory(
|
|
116
|
+
input_dir: Path,
|
|
117
|
+
output_path: Optional[str],
|
|
118
|
+
extensions: tuple,
|
|
119
|
+
overwrite: bool,
|
|
120
|
+
):
|
|
121
|
+
"""转换目录中的所有文件
|
|
122
|
+
|
|
123
|
+
Args:
|
|
124
|
+
input_dir: 输入目录路径
|
|
125
|
+
output_path: 输出目录路径(可选)
|
|
126
|
+
extensions: 文件扩展名过滤
|
|
127
|
+
overwrite: 是否覆盖已存在的文件
|
|
128
|
+
"""
|
|
129
|
+
from tqdm import tqdm
|
|
130
|
+
|
|
131
|
+
# 支持的格式列表
|
|
132
|
+
supported_extensions = {
|
|
133
|
+
# Office 文档
|
|
134
|
+
".docx", ".doc", ".pptx", ".xlsx", ".xls",
|
|
135
|
+
# PDF
|
|
136
|
+
".pdf",
|
|
137
|
+
# 图片
|
|
138
|
+
".jpg", ".jpeg", ".png", ".gif", ".bmp", ".webp",
|
|
139
|
+
# 音频
|
|
140
|
+
".wav", ".mp3", ".m4a",
|
|
141
|
+
# 视频
|
|
142
|
+
".mp4", ".avi", ".mov", ".mkv",
|
|
143
|
+
# 文本格式
|
|
144
|
+
".html", ".htm", ".csv", ".json", ".xml",
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
# 确定输出目录
|
|
148
|
+
if output_path:
|
|
149
|
+
output_dir = Path(output_path)
|
|
150
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
151
|
+
else:
|
|
152
|
+
output_dir = input_dir
|
|
153
|
+
|
|
154
|
+
# 收集要转换的文件
|
|
155
|
+
files_to_convert = []
|
|
156
|
+
for ext in supported_extensions:
|
|
157
|
+
if extensions and ext not in extensions:
|
|
158
|
+
# 如果指定了扩展名过滤,跳过不在列表中的
|
|
159
|
+
continue
|
|
160
|
+
for file_path in input_dir.rglob(f"*{ext}"):
|
|
161
|
+
if file_path.is_file():
|
|
162
|
+
files_to_convert.append(file_path)
|
|
163
|
+
|
|
164
|
+
if not files_to_convert:
|
|
165
|
+
click.echo("未找到可转换的文件")
|
|
166
|
+
return
|
|
167
|
+
|
|
168
|
+
click.echo(f"\n找到 {len(files_to_convert)} 个文件")
|
|
169
|
+
|
|
170
|
+
# 统计
|
|
171
|
+
success_count = 0
|
|
172
|
+
skip_count = 0
|
|
173
|
+
error_count = 0
|
|
174
|
+
|
|
175
|
+
# 转换文件(显示进度条)
|
|
176
|
+
for input_file in tqdm(files_to_convert, desc=" 转换进度", unit="文件"):
|
|
177
|
+
# 计算相对路径
|
|
178
|
+
try:
|
|
179
|
+
relative_path = input_file.relative_to(input_dir)
|
|
180
|
+
except ValueError:
|
|
181
|
+
# 文件不在 input_dir 下,使用文件名
|
|
182
|
+
relative_path = input_file.name
|
|
183
|
+
|
|
184
|
+
output_file = output_dir / relative_path.with_suffix(".md")
|
|
185
|
+
output_file.parent.mkdir(parents=True, exist_ok=True)
|
|
186
|
+
|
|
187
|
+
# 检查是否已存在
|
|
188
|
+
if output_file.exists() and not overwrite:
|
|
189
|
+
skip_count += 1
|
|
190
|
+
continue
|
|
191
|
+
|
|
192
|
+
try:
|
|
193
|
+
run_markitdown(str(input_file), str(output_file), "md")
|
|
194
|
+
success_count += 1
|
|
195
|
+
except Exception:
|
|
196
|
+
error_count += 1
|
|
197
|
+
|
|
198
|
+
# 显示结果
|
|
199
|
+
click.echo(f"\n转换完成:")
|
|
200
|
+
click.echo(f" ✓ 成功: {success_count} 个文件")
|
|
201
|
+
if skip_count > 0:
|
|
202
|
+
click.echo(f" ⊘ 跳过: {skip_count} 个文件(已存在)")
|
|
203
|
+
if error_count > 0:
|
|
204
|
+
click.echo(f" ✗ 失败: {error_count} 个文件")
|
|
205
|
+
sys.exit(1)
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"""知识库检索工具模块"""
|
|
2
|
+
|
|
3
|
+
from winwin_cli.kb_search.cli import kb_search, search, list_kb, index, add, remove, enable, disable, status, info
|
|
4
|
+
from winwin_cli.kb_search.search import KnowledgeBaseSearcher, SearchEngine
|
|
5
|
+
from winwin_cli.kb_search.config import KnowledgeBaseLoader, load_global_config
|
|
6
|
+
from winwin_cli.kb_search.indexer import KnowledgeBaseIndexer, BM25Indexer
|
|
7
|
+
from winwin_cli.kb_search.models import (
|
|
8
|
+
KnowledgeBaseConfig,
|
|
9
|
+
SearchResult,
|
|
10
|
+
SearchRequest,
|
|
11
|
+
Document,
|
|
12
|
+
IndexInfo,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"kb_search",
|
|
17
|
+
"search",
|
|
18
|
+
"list_kb",
|
|
19
|
+
"index",
|
|
20
|
+
"add",
|
|
21
|
+
"remove",
|
|
22
|
+
"enable",
|
|
23
|
+
"disable",
|
|
24
|
+
"status",
|
|
25
|
+
"info",
|
|
26
|
+
"KnowledgeBaseSearcher",
|
|
27
|
+
"SearchEngine",
|
|
28
|
+
"KnowledgeBaseLoader",
|
|
29
|
+
"load_global_config",
|
|
30
|
+
"KnowledgeBaseIndexer",
|
|
31
|
+
"BM25Indexer",
|
|
32
|
+
"KnowledgeBaseConfig",
|
|
33
|
+
"SearchResult",
|
|
34
|
+
"SearchRequest",
|
|
35
|
+
"Document",
|
|
36
|
+
"IndexInfo",
|
|
37
|
+
]
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""知识库检索工具 - CLI 模块(简化版)"""
|
|
2
|
+
|
|
3
|
+
import click
|
|
4
|
+
|
|
5
|
+
from winwin_cli.kb_search.commands import discover_commands
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@click.group()
|
|
9
|
+
def kb_search():
|
|
10
|
+
"""知识库检索工具 - 快速搜索你的文档"""
|
|
11
|
+
pass
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
# Auto-discover and register all commands from the commands/ directory
|
|
15
|
+
discovered = discover_commands(kb_search)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
# Export command functions for backward compatibility with tests
|
|
19
|
+
# These are re-exported from their respective modules
|
|
20
|
+
from winwin_cli.kb_search.commands.search import search
|
|
21
|
+
from winwin_cli.kb_search.commands.list import list_kb
|
|
22
|
+
from winwin_cli.kb_search.commands.index import index
|
|
23
|
+
from winwin_cli.kb_search.commands.add import add
|
|
24
|
+
from winwin_cli.kb_search.commands.remove import remove
|
|
25
|
+
from winwin_cli.kb_search.commands.enable import enable
|
|
26
|
+
from winwin_cli.kb_search.commands.disable import disable
|
|
27
|
+
from winwin_cli.kb_search.commands.status import status
|
|
28
|
+
from winwin_cli.kb_search.commands.info import info
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
__all__ = [
|
|
32
|
+
"kb_search",
|
|
33
|
+
"search",
|
|
34
|
+
"list_kb",
|
|
35
|
+
"index",
|
|
36
|
+
"add",
|
|
37
|
+
"remove",
|
|
38
|
+
"enable",
|
|
39
|
+
"disable",
|
|
40
|
+
"status",
|
|
41
|
+
"info",
|
|
42
|
+
]
|