vertai 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vertai-0.1.0/.coverage +0 -0
- vertai-0.1.0/PKG-INFO +289 -0
- vertai-0.1.0/README.md +253 -0
- vertai-0.1.0/ai_sdk/__init__.py +120 -0
- vertai-0.1.0/ai_sdk/core/__init__.py +36 -0
- vertai-0.1.0/ai_sdk/core/llm.py +1010 -0
- vertai-0.1.0/ai_sdk/core/memory.py +374 -0
- vertai-0.1.0/ai_sdk/core/vector.py +645 -0
- vertai-0.1.0/ai_sdk/data/__init__.py +5 -0
- vertai-0.1.0/ai_sdk/data/parser.py +396 -0
- vertai-0.1.0/ai_sdk/local/__init__.py +22 -0
- vertai-0.1.0/ai_sdk/local/models.py +792 -0
- vertai-0.1.0/ai_sdk/output/__init__.py +15 -0
- vertai-0.1.0/ai_sdk/output/docgen.py +404 -0
- vertai-0.1.0/ai_sdk/output/structured.py +525 -0
- vertai-0.1.0/ai_sdk/scenarios/__init__.py +21 -0
- vertai-0.1.0/ai_sdk/scenarios/knowledge_qa.py +539 -0
- vertai-0.1.0/ai_sdk/scenarios/reviewer.py +262 -0
- vertai-0.1.0/ai_sdk/viz/__init__.py +19 -0
- vertai-0.1.0/ai_sdk/viz/dashboard.py +820 -0
- vertai-0.1.0/ai_sdk/workflow/__init__.py +29 -0
- vertai-0.1.0/ai_sdk/workflow/workflow.py +962 -0
- vertai-0.1.0/docs/FUNCTION_DEPENDENCIES.md +252 -0
- vertai-0.1.0/docs/superpowers/specs/2026-06-06-ai-agent-sdk-design.md +207 -0
- vertai-0.1.0/examples/knowledge_qa_demo.py +190 -0
- vertai-0.1.0/examples/quick_start_knowledge_base.py +397 -0
- vertai-0.1.0/pyproject.toml +73 -0
- vertai-0.1.0/pytest.ini +3 -0
- vertai-0.1.0/requirements-dev.txt +1 -0
- vertai-0.1.0/requirements.txt +0 -0
- vertai-0.1.0/sample_docs/machine_learning.txt +26 -0
- vertai-0.1.0/sample_docs/python.txt +16 -0
- vertai-0.1.0/session_user_123.json +30 -0
- vertai-0.1.0/tests/__init__.py +1 -0
- vertai-0.1.0/tests/conftest.py +8 -0
- vertai-0.1.0/tests/test_dashboard.py +561 -0
- vertai-0.1.0/tests/test_deepseek_integration.py +369 -0
- vertai-0.1.0/tests/test_docgen.py +439 -0
- vertai-0.1.0/tests/test_knowledge_qa.py +878 -0
- vertai-0.1.0/tests/test_llm.py +1298 -0
- vertai-0.1.0/tests/test_local_models.py +532 -0
- vertai-0.1.0/tests/test_memory.py +448 -0
- vertai-0.1.0/tests/test_parser.py +631 -0
- vertai-0.1.0/tests/test_reviewer.py +381 -0
- vertai-0.1.0/tests/test_structured.py +1119 -0
- vertai-0.1.0/tests/test_vector.py +1010 -0
- vertai-0.1.0/tests/test_workflow.py +1012 -0
vertai-0.1.0/.coverage
ADDED
|
Binary file
|
vertai-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,289 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: vertai
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: 本地优先、渐进式复杂度的 AI 智能体开发 SDK
|
|
5
|
+
Project-URL: Homepage, https://github.com/EnjouZeratul/vertai
|
|
6
|
+
Project-URL: Documentation, https://github.com/EnjouZeratul/vertai#readme
|
|
7
|
+
Author: AI SDK Team
|
|
8
|
+
License-Expression: MIT
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Requires-Python: >=3.10
|
|
17
|
+
Requires-Dist: httpx>=0.25.0
|
|
18
|
+
Requires-Dist: pydantic>=2.0.0
|
|
19
|
+
Provides-Extra: dev
|
|
20
|
+
Requires-Dist: mypy>=1.0.0; extra == 'dev'
|
|
21
|
+
Requires-Dist: pytest-asyncio>=0.21.0; extra == 'dev'
|
|
22
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
|
|
23
|
+
Requires-Dist: pytest>=7.0.0; extra == 'dev'
|
|
24
|
+
Requires-Dist: ruff>=0.1.0; extra == 'dev'
|
|
25
|
+
Provides-Extra: doc-parser
|
|
26
|
+
Requires-Dist: openpyxl>=3.1.0; extra == 'doc-parser'
|
|
27
|
+
Requires-Dist: pymupdf>=1.23.0; extra == 'doc-parser'
|
|
28
|
+
Requires-Dist: python-docx>=1.0.0; extra == 'doc-parser'
|
|
29
|
+
Requires-Dist: python-pptx>=0.6.21; extra == 'doc-parser'
|
|
30
|
+
Provides-Extra: embeddings
|
|
31
|
+
Requires-Dist: sentence-transformers>=2.0.0; extra == 'embeddings'
|
|
32
|
+
Provides-Extra: production
|
|
33
|
+
Requires-Dist: ai-sdk[doc-parser,embeddings]; extra == 'production'
|
|
34
|
+
Requires-Dist: chromadb>=0.4.0; extra == 'production'
|
|
35
|
+
Description-Content-Type: text/markdown
|
|
36
|
+
|
|
37
|
+
# VertAI
|
|
38
|
+
|
|
39
|
+
垂直领域 AI 智能体开发 SDK,支持完全离线运行。
|
|
40
|
+
|
|
41
|
+
A vertical-domain AI agent development SDK designed for fully offline operation.
|
|
42
|
+
|
|
43
|
+
## 设计理念 | Design Philosophy
|
|
44
|
+
|
|
45
|
+
**模块化架构**:核心功能轻量安装,按需扩展语义能力。
|
|
46
|
+
|
|
47
|
+
**Modular Architecture**: Lightweight core installation with optional semantic capabilities.
|
|
48
|
+
|
|
49
|
+
```
|
|
50
|
+
vertai (核心 ~5MB)
|
|
51
|
+
├── Workflow # 工作流编排
|
|
52
|
+
├── Dashboard # 数据可视化
|
|
53
|
+
├── DocGen # 文档生成
|
|
54
|
+
├── DocParser # 文档解析 (Markdown)
|
|
55
|
+
├── SessionMemory # 会话管理
|
|
56
|
+
└── VectorEngine # 向量存储 (需嵌入模型提供语义能力)
|
|
57
|
+
|
|
58
|
+
可选扩展
|
|
59
|
+
├── [embeddings] # 离线语义搜索
|
|
60
|
+
├── [doc-parser] # 文档解析 (PDF/Word/Excel)
|
|
61
|
+
└── [production] # 生产环境完整配置
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
## 安装 | Installation
|
|
65
|
+
|
|
66
|
+
### 核心安装 | Core Installation
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
pip install vertai
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
### 扩展安装 | Optional Extensions
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
# 离线语义搜索支持
|
|
76
|
+
# Offline semantic search support
|
|
77
|
+
pip install vertai[embeddings]
|
|
78
|
+
|
|
79
|
+
# 文档解析支持 (PDF/Word/Excel/PPT)
|
|
80
|
+
# Document parsing support (PDF/Word/Excel/PPT)
|
|
81
|
+
pip install vertai[doc-parser]
|
|
82
|
+
|
|
83
|
+
# 完整生产配置
|
|
84
|
+
# Complete production configuration
|
|
85
|
+
pip install vertai[production]
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
| 安装选项 | 体积 | 功能 |
|
|
89
|
+
|---------|------|------|
|
|
90
|
+
| 核心 | ~5MB | Workflow, Dashboard, DocGen, Markdown解析 |
|
|
91
|
+
| [embeddings] | ~500MB | 离线语义向量搜索 |
|
|
92
|
+
| [doc-parser] | ~50MB | PDF/Word/Excel/PPT解析 |
|
|
93
|
+
| [production] | ~600MB | 完整生产配置 |
|
|
94
|
+
|
|
95
|
+
| Installation Option | Size | Features |
|
|
96
|
+
|---------------------|------|----------|
|
|
97
|
+
| Core | ~5MB | Workflow, Dashboard, DocGen, Markdown parsing |
|
|
98
|
+
| [embeddings] | ~500MB | Offline semantic vector search |
|
|
99
|
+
| [doc-parser] | ~50MB | PDF/Word/Excel/PPT parsing |
|
|
100
|
+
| [production] | ~600MB | Complete production configuration |
|
|
101
|
+
|
|
102
|
+
## 快速开始 | Quick Start
|
|
103
|
+
|
|
104
|
+
### 工作流编排(完全离线)| Workflow Orchestration (Fully Offline)
|
|
105
|
+
|
|
106
|
+
```python
|
|
107
|
+
from ai_sdk import Workflow
|
|
108
|
+
|
|
109
|
+
wf = Workflow()
|
|
110
|
+
wf.step("load", lambda ctx: ctx.set("data", [1, 2, 3, 4, 5]))
|
|
111
|
+
wf.step("process", lambda ctx: ctx.set("sum", sum(ctx.get("data"))))
|
|
112
|
+
wf.step("output", lambda ctx: print(f"总和: {ctx.get('sum')}"))
|
|
113
|
+
wf.run()
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
### 语义向量搜索(需安装 embeddings)| Semantic Vector Search (requires embeddings)
|
|
117
|
+
|
|
118
|
+
```python
|
|
119
|
+
from ai_sdk import VectorEngine, Document
|
|
120
|
+
from sentence_transformers import SentenceTransformer
|
|
121
|
+
|
|
122
|
+
# 加载嵌入模型(首次下载约100MB,之后离线可用)
|
|
123
|
+
# Load embedding model (~100MB first download, then works offline)
|
|
124
|
+
model = SentenceTransformer('bge-small-zh-v1.5')
|
|
125
|
+
|
|
126
|
+
def embedding_fn(text):
|
|
127
|
+
return model.encode(text).tolist()
|
|
128
|
+
|
|
129
|
+
# 创建向量引擎
|
|
130
|
+
# Create vector engine
|
|
131
|
+
engine = VectorEngine(store_type="memory", embedding_fn=embedding_fn)
|
|
132
|
+
|
|
133
|
+
# 索引文档
|
|
134
|
+
# Index documents
|
|
135
|
+
engine.index_documents([
|
|
136
|
+
Document(content="Python是一种编程语言,由Guido van Rossum创建"),
|
|
137
|
+
Document(content="机器学习是人工智能的子领域"),
|
|
138
|
+
Document(content="深度学习使用多层神经网络"),
|
|
139
|
+
])
|
|
140
|
+
|
|
141
|
+
# 语义搜索
|
|
142
|
+
# Semantic search
|
|
143
|
+
results = engine.search("编程语言")
|
|
144
|
+
# 返回:Python是一种编程语言...(语义匹配,非关键词匹配)
|
|
145
|
+
# Returns: Python是一种编程语言... (semantic match, not keyword match)
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
### LLM 对话(需配置 API)| LLM Chat (requires API configuration)
|
|
149
|
+
|
|
150
|
+
```python
|
|
151
|
+
from ai_sdk import LLMEngine, LLMConfig, ModelProvider
|
|
152
|
+
|
|
153
|
+
config = LLMConfig(
|
|
154
|
+
provider=ModelProvider.DEEPSEEK,
|
|
155
|
+
base_url="https://api.deepseek.com/anthropic",
|
|
156
|
+
api_key="sk-xxx", # 或设置环境变量 AI_SDK_API_KEY
|
|
157
|
+
model="deepseek-v4-flash",
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
llm = LLMEngine(config)
|
|
161
|
+
|
|
162
|
+
# 单次生成
|
|
163
|
+
# Single generation
|
|
164
|
+
result = llm.generate("你好")
|
|
165
|
+
|
|
166
|
+
# 流式输出
|
|
167
|
+
# Streaming output
|
|
168
|
+
for chunk in llm.stream("讲个故事"):
|
|
169
|
+
print(chunk, end="", flush=True)
|
|
170
|
+
|
|
171
|
+
# 多轮对话
|
|
172
|
+
# Multi-turn conversation
|
|
173
|
+
messages = [
|
|
174
|
+
{"role": "user", "content": "我叫小明"},
|
|
175
|
+
{"role": "assistant", "content": "你好小明!"},
|
|
176
|
+
{"role": "user", "content": "我叫什么名字?"},
|
|
177
|
+
]
|
|
178
|
+
result = llm.chat(messages)
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
### 结构化数据提取 | Structured Data Extraction
|
|
182
|
+
|
|
183
|
+
```python
|
|
184
|
+
from ai_sdk import StructuredOutput
|
|
185
|
+
|
|
186
|
+
schema = {"name": "string", "amount": "number"}
|
|
187
|
+
|
|
188
|
+
# 正则模式(完全离线,简单模式)
|
|
189
|
+
# Regex mode (fully offline, simple patterns)
|
|
190
|
+
output = StructuredOutput(schema)
|
|
191
|
+
result = output.extract("张三报销500元")
|
|
192
|
+
# {'name': '张三', 'amount': 500.0}
|
|
193
|
+
|
|
194
|
+
# LLM模式(需配置API,语义理解)
|
|
195
|
+
# LLM mode (requires API, semantic understanding)
|
|
196
|
+
from ai_sdk import LLMEngine, LLMConfig, ModelProvider
|
|
197
|
+
llm = LLMEngine(LLMConfig(
|
|
198
|
+
provider=ModelProvider.DEEPSEEK,
|
|
199
|
+
base_url="https://api.deepseek.com/anthropic",
|
|
200
|
+
api_key="sk-xxx",
|
|
201
|
+
))
|
|
202
|
+
output = StructuredOutput(schema, llm=llm)
|
|
203
|
+
result = output.extract("李四消费了三百块")
|
|
204
|
+
# {'name': '李四', 'amount': 300.0}(语义理解中文数字)
|
|
205
|
+
# {'name': '李四', 'amount': 300.0} (semantic understanding of Chinese numbers)
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
## 功能模块 | Feature Modules
|
|
209
|
+
|
|
210
|
+
| 模块 | 离线可用 | 依赖 |
|
|
211
|
+
|------|---------|------|
|
|
212
|
+
| Workflow | ✅ | 无 |
|
|
213
|
+
| Dashboard | ✅ | 无 |
|
|
214
|
+
| DocGen (Markdown/HTML) | ✅ | 无 |
|
|
215
|
+
| DocParser (Markdown) | ✅ | 无 |
|
|
216
|
+
| SessionMemory | ✅ | 无 |
|
|
217
|
+
| VectorEngine (存储) | ✅ | 无 |
|
|
218
|
+
| VectorEngine (语义搜索) | ✅ | sentence-transformers |
|
|
219
|
+
| StructuredOutput (正则) | ✅ | 无 |
|
|
220
|
+
| StructuredOutput (语义) | ❌ | LLM API |
|
|
221
|
+
| LLMEngine | ❌ | LLM API 或 Ollama |
|
|
222
|
+
| KnowledgeQA | ✅ | 向量搜索离线,生成需LLM |
|
|
223
|
+
| LocalModelManager | ✅ | 模型文件本地存储 |
|
|
224
|
+
|
|
225
|
+
| Module | Offline | Dependencies |
|
|
226
|
+
|--------|---------|--------------|
|
|
227
|
+
| Workflow | ✅ | None |
|
|
228
|
+
| Dashboard | ✅ | None |
|
|
229
|
+
| DocGen (Markdown/HTML) | ✅ | None |
|
|
230
|
+
| DocParser (Markdown) | ✅ | None |
|
|
231
|
+
| SessionMemory | ✅ | None |
|
|
232
|
+
| VectorEngine (Storage) | ✅ | None |
|
|
233
|
+
| VectorEngine (Semantic Search) | ✅ | sentence-transformers |
|
|
234
|
+
| StructuredOutput (Regex) | ✅ | None |
|
|
235
|
+
| StructuredOutput (Semantic) | ❌ | LLM API |
|
|
236
|
+
| LLMEngine | ❌ | LLM API or Ollama |
|
|
237
|
+
| KnowledgeQA | ✅ | Vector search offline, generation needs LLM |
|
|
238
|
+
| LocalModelManager | ✅ | Local model file storage |
|
|
239
|
+
|
|
240
|
+
## 本地模型 | Local Models
|
|
241
|
+
|
|
242
|
+
### 嵌入模型 | Embedding Models
|
|
243
|
+
|
|
244
|
+
| 模型 | 体积 | 语言 | 离线 |
|
|
245
|
+
|------|------|------|------|
|
|
246
|
+
| bge-small-zh-v1.5 | 100MB | 中文 | ✅ |
|
|
247
|
+
| bge-large-zh-v1.5 | 650MB | 中文 | ✅ |
|
|
248
|
+
| all-MiniLM-L6-v2 | 80MB | 英文 | ✅ |
|
|
249
|
+
|
|
250
|
+
| Model | Size | Language | Offline |
|
|
251
|
+
|-------|------|----------|---------|
|
|
252
|
+
| bge-small-zh-v1.5 | 100MB | Chinese | ✅ |
|
|
253
|
+
| bge-large-zh-v1.5 | 650MB | Chinese | ✅ |
|
|
254
|
+
| all-MiniLM-L6-v2 | 80MB | English | ✅ |
|
|
255
|
+
|
|
256
|
+
### 语音模型 | Speech Models
|
|
257
|
+
|
|
258
|
+
| 模型 | 体积 | 最低配置 | 离线 |
|
|
259
|
+
|------|------|---------|------|
|
|
260
|
+
| whisper-tiny | 75MB | 1GB RAM | ✅ |
|
|
261
|
+
| whisper-small | 466MB | 2GB RAM | ✅ |
|
|
262
|
+
| whisper-large-v3 | 2.9GB | 10GB RAM | ✅ |
|
|
263
|
+
|
|
264
|
+
| Model | Size | Min Requirements | Offline |
|
|
265
|
+
|-------|------|------------------|---------|
|
|
266
|
+
| whisper-tiny | 75MB | 1GB RAM | ✅ |
|
|
267
|
+
| whisper-small | 466MB | 2GB RAM | ✅ |
|
|
268
|
+
| whisper-large-v3 | 2.9GB | 10GB RAM | ✅ |
|
|
269
|
+
|
|
270
|
+
```python
|
|
271
|
+
from ai_sdk import LocalModelManager
|
|
272
|
+
|
|
273
|
+
manager = LocalModelManager()
|
|
274
|
+
manager.download("bge-small-zh-v1.5") # 首次下载 | First download
|
|
275
|
+
model = manager.load("bge-small-zh-v1.5") # 之后离线加载 | Then load offline
|
|
276
|
+
```
|
|
277
|
+
|
|
278
|
+
## 测试 | Testing
|
|
279
|
+
|
|
280
|
+
```bash
|
|
281
|
+
pip install vertai[dev]
|
|
282
|
+
python -m pytest tests/ -v --cov=ai_sdk
|
|
283
|
+
|
|
284
|
+
# 642 passed, 20 skipped, 94% coverage
|
|
285
|
+
```
|
|
286
|
+
|
|
287
|
+
## 许可证 | License
|
|
288
|
+
|
|
289
|
+
MIT
|
vertai-0.1.0/README.md
ADDED
|
@@ -0,0 +1,253 @@
|
|
|
1
|
+
# VertAI
|
|
2
|
+
|
|
3
|
+
垂直领域 AI 智能体开发 SDK,支持完全离线运行。
|
|
4
|
+
|
|
5
|
+
A vertical-domain AI agent development SDK designed for fully offline operation.
|
|
6
|
+
|
|
7
|
+
## 设计理念 | Design Philosophy
|
|
8
|
+
|
|
9
|
+
**模块化架构**:核心功能轻量安装,按需扩展语义能力。
|
|
10
|
+
|
|
11
|
+
**Modular Architecture**: Lightweight core installation with optional semantic capabilities.
|
|
12
|
+
|
|
13
|
+
```
|
|
14
|
+
vertai (核心 ~5MB)
|
|
15
|
+
├── Workflow # 工作流编排
|
|
16
|
+
├── Dashboard # 数据可视化
|
|
17
|
+
├── DocGen # 文档生成
|
|
18
|
+
├── DocParser # 文档解析 (Markdown)
|
|
19
|
+
├── SessionMemory # 会话管理
|
|
20
|
+
└── VectorEngine # 向量存储 (需嵌入模型提供语义能力)
|
|
21
|
+
|
|
22
|
+
可选扩展
|
|
23
|
+
├── [embeddings] # 离线语义搜索
|
|
24
|
+
├── [doc-parser] # 文档解析 (PDF/Word/Excel)
|
|
25
|
+
└── [production] # 生产环境完整配置
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
## 安装 | Installation
|
|
29
|
+
|
|
30
|
+
### 核心安装 | Core Installation
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
pip install vertai
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
### 扩展安装 | Optional Extensions
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
# 离线语义搜索支持
|
|
40
|
+
# Offline semantic search support
|
|
41
|
+
pip install vertai[embeddings]
|
|
42
|
+
|
|
43
|
+
# 文档解析支持 (PDF/Word/Excel/PPT)
|
|
44
|
+
# Document parsing support (PDF/Word/Excel/PPT)
|
|
45
|
+
pip install vertai[doc-parser]
|
|
46
|
+
|
|
47
|
+
# 完整生产配置
|
|
48
|
+
# Complete production configuration
|
|
49
|
+
pip install vertai[production]
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
| 安装选项 | 体积 | 功能 |
|
|
53
|
+
|---------|------|------|
|
|
54
|
+
| 核心 | ~5MB | Workflow, Dashboard, DocGen, Markdown解析 |
|
|
55
|
+
| [embeddings] | ~500MB | 离线语义向量搜索 |
|
|
56
|
+
| [doc-parser] | ~50MB | PDF/Word/Excel/PPT解析 |
|
|
57
|
+
| [production] | ~600MB | 完整生产配置 |
|
|
58
|
+
|
|
59
|
+
| Installation Option | Size | Features |
|
|
60
|
+
|---------------------|------|----------|
|
|
61
|
+
| Core | ~5MB | Workflow, Dashboard, DocGen, Markdown parsing |
|
|
62
|
+
| [embeddings] | ~500MB | Offline semantic vector search |
|
|
63
|
+
| [doc-parser] | ~50MB | PDF/Word/Excel/PPT parsing |
|
|
64
|
+
| [production] | ~600MB | Complete production configuration |
|
|
65
|
+
|
|
66
|
+
## 快速开始 | Quick Start
|
|
67
|
+
|
|
68
|
+
### 工作流编排(完全离线)| Workflow Orchestration (Fully Offline)
|
|
69
|
+
|
|
70
|
+
```python
|
|
71
|
+
from ai_sdk import Workflow
|
|
72
|
+
|
|
73
|
+
wf = Workflow()
|
|
74
|
+
wf.step("load", lambda ctx: ctx.set("data", [1, 2, 3, 4, 5]))
|
|
75
|
+
wf.step("process", lambda ctx: ctx.set("sum", sum(ctx.get("data"))))
|
|
76
|
+
wf.step("output", lambda ctx: print(f"总和: {ctx.get('sum')}"))
|
|
77
|
+
wf.run()
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
### 语义向量搜索(需安装 embeddings)| Semantic Vector Search (requires embeddings)
|
|
81
|
+
|
|
82
|
+
```python
|
|
83
|
+
from ai_sdk import VectorEngine, Document
|
|
84
|
+
from sentence_transformers import SentenceTransformer
|
|
85
|
+
|
|
86
|
+
# 加载嵌入模型(首次下载约100MB,之后离线可用)
|
|
87
|
+
# Load embedding model (~100MB first download, then works offline)
|
|
88
|
+
model = SentenceTransformer('bge-small-zh-v1.5')
|
|
89
|
+
|
|
90
|
+
def embedding_fn(text):
|
|
91
|
+
return model.encode(text).tolist()
|
|
92
|
+
|
|
93
|
+
# 创建向量引擎
|
|
94
|
+
# Create vector engine
|
|
95
|
+
engine = VectorEngine(store_type="memory", embedding_fn=embedding_fn)
|
|
96
|
+
|
|
97
|
+
# 索引文档
|
|
98
|
+
# Index documents
|
|
99
|
+
engine.index_documents([
|
|
100
|
+
Document(content="Python是一种编程语言,由Guido van Rossum创建"),
|
|
101
|
+
Document(content="机器学习是人工智能的子领域"),
|
|
102
|
+
Document(content="深度学习使用多层神经网络"),
|
|
103
|
+
])
|
|
104
|
+
|
|
105
|
+
# 语义搜索
|
|
106
|
+
# Semantic search
|
|
107
|
+
results = engine.search("编程语言")
|
|
108
|
+
# 返回:Python是一种编程语言...(语义匹配,非关键词匹配)
|
|
109
|
+
# Returns: Python是一种编程语言... (semantic match, not keyword match)
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
### LLM 对话(需配置 API)| LLM Chat (requires API configuration)
|
|
113
|
+
|
|
114
|
+
```python
|
|
115
|
+
from ai_sdk import LLMEngine, LLMConfig, ModelProvider
|
|
116
|
+
|
|
117
|
+
config = LLMConfig(
|
|
118
|
+
provider=ModelProvider.DEEPSEEK,
|
|
119
|
+
base_url="https://api.deepseek.com/anthropic",
|
|
120
|
+
api_key="sk-xxx", # 或设置环境变量 AI_SDK_API_KEY
|
|
121
|
+
model="deepseek-v4-flash",
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
llm = LLMEngine(config)
|
|
125
|
+
|
|
126
|
+
# 单次生成
|
|
127
|
+
# Single generation
|
|
128
|
+
result = llm.generate("你好")
|
|
129
|
+
|
|
130
|
+
# 流式输出
|
|
131
|
+
# Streaming output
|
|
132
|
+
for chunk in llm.stream("讲个故事"):
|
|
133
|
+
print(chunk, end="", flush=True)
|
|
134
|
+
|
|
135
|
+
# 多轮对话
|
|
136
|
+
# Multi-turn conversation
|
|
137
|
+
messages = [
|
|
138
|
+
{"role": "user", "content": "我叫小明"},
|
|
139
|
+
{"role": "assistant", "content": "你好小明!"},
|
|
140
|
+
{"role": "user", "content": "我叫什么名字?"},
|
|
141
|
+
]
|
|
142
|
+
result = llm.chat(messages)
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
### 结构化数据提取 | Structured Data Extraction
|
|
146
|
+
|
|
147
|
+
```python
|
|
148
|
+
from ai_sdk import StructuredOutput
|
|
149
|
+
|
|
150
|
+
schema = {"name": "string", "amount": "number"}
|
|
151
|
+
|
|
152
|
+
# 正则模式(完全离线,简单模式)
|
|
153
|
+
# Regex mode (fully offline, simple patterns)
|
|
154
|
+
output = StructuredOutput(schema)
|
|
155
|
+
result = output.extract("张三报销500元")
|
|
156
|
+
# {'name': '张三', 'amount': 500.0}
|
|
157
|
+
|
|
158
|
+
# LLM模式(需配置API,语义理解)
|
|
159
|
+
# LLM mode (requires API, semantic understanding)
|
|
160
|
+
from ai_sdk import LLMEngine, LLMConfig, ModelProvider
|
|
161
|
+
llm = LLMEngine(LLMConfig(
|
|
162
|
+
provider=ModelProvider.DEEPSEEK,
|
|
163
|
+
base_url="https://api.deepseek.com/anthropic",
|
|
164
|
+
api_key="sk-xxx",
|
|
165
|
+
))
|
|
166
|
+
output = StructuredOutput(schema, llm=llm)
|
|
167
|
+
result = output.extract("李四消费了三百块")
|
|
168
|
+
# {'name': '李四', 'amount': 300.0}(语义理解中文数字)
|
|
169
|
+
# {'name': '李四', 'amount': 300.0} (semantic understanding of Chinese numbers)
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
## 功能模块 | Feature Modules
|
|
173
|
+
|
|
174
|
+
| 模块 | 离线可用 | 依赖 |
|
|
175
|
+
|------|---------|------|
|
|
176
|
+
| Workflow | ✅ | 无 |
|
|
177
|
+
| Dashboard | ✅ | 无 |
|
|
178
|
+
| DocGen (Markdown/HTML) | ✅ | 无 |
|
|
179
|
+
| DocParser (Markdown) | ✅ | 无 |
|
|
180
|
+
| SessionMemory | ✅ | 无 |
|
|
181
|
+
| VectorEngine (存储) | ✅ | 无 |
|
|
182
|
+
| VectorEngine (语义搜索) | ✅ | sentence-transformers |
|
|
183
|
+
| StructuredOutput (正则) | ✅ | 无 |
|
|
184
|
+
| StructuredOutput (语义) | ❌ | LLM API |
|
|
185
|
+
| LLMEngine | ❌ | LLM API 或 Ollama |
|
|
186
|
+
| KnowledgeQA | ✅ | 向量搜索离线,生成需LLM |
|
|
187
|
+
| LocalModelManager | ✅ | 模型文件本地存储 |
|
|
188
|
+
|
|
189
|
+
| Module | Offline | Dependencies |
|
|
190
|
+
|--------|---------|--------------|
|
|
191
|
+
| Workflow | ✅ | None |
|
|
192
|
+
| Dashboard | ✅ | None |
|
|
193
|
+
| DocGen (Markdown/HTML) | ✅ | None |
|
|
194
|
+
| DocParser (Markdown) | ✅ | None |
|
|
195
|
+
| SessionMemory | ✅ | None |
|
|
196
|
+
| VectorEngine (Storage) | ✅ | None |
|
|
197
|
+
| VectorEngine (Semantic Search) | ✅ | sentence-transformers |
|
|
198
|
+
| StructuredOutput (Regex) | ✅ | None |
|
|
199
|
+
| StructuredOutput (Semantic) | ❌ | LLM API |
|
|
200
|
+
| LLMEngine | ❌ | LLM API or Ollama |
|
|
201
|
+
| KnowledgeQA | ✅ | Vector search offline, generation needs LLM |
|
|
202
|
+
| LocalModelManager | ✅ | Local model file storage |
|
|
203
|
+
|
|
204
|
+
## 本地模型 | Local Models
|
|
205
|
+
|
|
206
|
+
### 嵌入模型 | Embedding Models
|
|
207
|
+
|
|
208
|
+
| 模型 | 体积 | 语言 | 离线 |
|
|
209
|
+
|------|------|------|------|
|
|
210
|
+
| bge-small-zh-v1.5 | 100MB | 中文 | ✅ |
|
|
211
|
+
| bge-large-zh-v1.5 | 650MB | 中文 | ✅ |
|
|
212
|
+
| all-MiniLM-L6-v2 | 80MB | 英文 | ✅ |
|
|
213
|
+
|
|
214
|
+
| Model | Size | Language | Offline |
|
|
215
|
+
|-------|------|----------|---------|
|
|
216
|
+
| bge-small-zh-v1.5 | 100MB | Chinese | ✅ |
|
|
217
|
+
| bge-large-zh-v1.5 | 650MB | Chinese | ✅ |
|
|
218
|
+
| all-MiniLM-L6-v2 | 80MB | English | ✅ |
|
|
219
|
+
|
|
220
|
+
### 语音模型 | Speech Models
|
|
221
|
+
|
|
222
|
+
| 模型 | 体积 | 最低配置 | 离线 |
|
|
223
|
+
|------|------|---------|------|
|
|
224
|
+
| whisper-tiny | 75MB | 1GB RAM | ✅ |
|
|
225
|
+
| whisper-small | 466MB | 2GB RAM | ✅ |
|
|
226
|
+
| whisper-large-v3 | 2.9GB | 10GB RAM | ✅ |
|
|
227
|
+
|
|
228
|
+
| Model | Size | Min Requirements | Offline |
|
|
229
|
+
|-------|------|------------------|---------|
|
|
230
|
+
| whisper-tiny | 75MB | 1GB RAM | ✅ |
|
|
231
|
+
| whisper-small | 466MB | 2GB RAM | ✅ |
|
|
232
|
+
| whisper-large-v3 | 2.9GB | 10GB RAM | ✅ |
|
|
233
|
+
|
|
234
|
+
```python
|
|
235
|
+
from ai_sdk import LocalModelManager
|
|
236
|
+
|
|
237
|
+
manager = LocalModelManager()
|
|
238
|
+
manager.download("bge-small-zh-v1.5") # 首次下载 | First download
|
|
239
|
+
model = manager.load("bge-small-zh-v1.5") # 之后离线加载 | Then load offline
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
## 测试 | Testing
|
|
243
|
+
|
|
244
|
+
```bash
|
|
245
|
+
pip install vertai[dev]
|
|
246
|
+
python -m pytest tests/ -v --cov=ai_sdk
|
|
247
|
+
|
|
248
|
+
# 642 passed, 20 skipped, 94% coverage
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
## 许可证 | License
|
|
252
|
+
|
|
253
|
+
MIT
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
"""VertAI - 垂直领域 AI 智能体开发 SDK"""
|
|
2
|
+
|
|
3
|
+
__version__ = "0.1.0"
|
|
4
|
+
|
|
5
|
+
from ai_sdk.core.llm import (
|
|
6
|
+
LLMEngine,
|
|
7
|
+
LLMConfig,
|
|
8
|
+
ModelProvider,
|
|
9
|
+
ChatMessage,
|
|
10
|
+
GenerateResult,
|
|
11
|
+
)
|
|
12
|
+
from ai_sdk.core.memory import (
|
|
13
|
+
Message,
|
|
14
|
+
SessionConfig,
|
|
15
|
+
SessionMemory,
|
|
16
|
+
)
|
|
17
|
+
from ai_sdk.core.vector import (
|
|
18
|
+
Document,
|
|
19
|
+
VectorEngine,
|
|
20
|
+
VectorConfig,
|
|
21
|
+
SearchResult,
|
|
22
|
+
)
|
|
23
|
+
from ai_sdk.data.parser import DocParser
|
|
24
|
+
from ai_sdk.output.structured import StructuredOutput
|
|
25
|
+
from ai_sdk.output.docgen import DocGen
|
|
26
|
+
from ai_sdk.scenarios.reviewer import Reviewer, ReviewerConfig, ReviewResult
|
|
27
|
+
from ai_sdk.scenarios.knowledge_qa import (
|
|
28
|
+
KnowledgeQA,
|
|
29
|
+
KnowledgeQAConfig,
|
|
30
|
+
AnswerResult,
|
|
31
|
+
SourceReference,
|
|
32
|
+
)
|
|
33
|
+
from ai_sdk.viz.dashboard import (
|
|
34
|
+
Dashboard,
|
|
35
|
+
DashboardTheme,
|
|
36
|
+
Metric,
|
|
37
|
+
Chart,
|
|
38
|
+
ChartType,
|
|
39
|
+
ChartConfig,
|
|
40
|
+
)
|
|
41
|
+
from ai_sdk.workflow import (
|
|
42
|
+
Workflow,
|
|
43
|
+
WorkflowConfig,
|
|
44
|
+
WorkflowContext,
|
|
45
|
+
WorkflowResult,
|
|
46
|
+
StepResult,
|
|
47
|
+
StepStatus,
|
|
48
|
+
Step,
|
|
49
|
+
StepType,
|
|
50
|
+
ParallelConfig,
|
|
51
|
+
LoopConfig,
|
|
52
|
+
LoopType,
|
|
53
|
+
)
|
|
54
|
+
from ai_sdk.local import (
|
|
55
|
+
LocalModelManager,
|
|
56
|
+
LocalModelConfig,
|
|
57
|
+
ModelCategory,
|
|
58
|
+
ModelInfo,
|
|
59
|
+
WhisperModel,
|
|
60
|
+
EmbeddingModel,
|
|
61
|
+
)
|
|
62
|
+
from ai_sdk.local.models import check_hardware_requirements
|
|
63
|
+
|
|
64
|
+
__all__ = [
|
|
65
|
+
# LLM
|
|
66
|
+
"LLMEngine",
|
|
67
|
+
"LLMConfig",
|
|
68
|
+
"ModelProvider",
|
|
69
|
+
"ChatMessage",
|
|
70
|
+
"GenerateResult",
|
|
71
|
+
# Memory
|
|
72
|
+
"Message",
|
|
73
|
+
"SessionConfig",
|
|
74
|
+
"SessionMemory",
|
|
75
|
+
# Vector
|
|
76
|
+
"Document",
|
|
77
|
+
"VectorEngine",
|
|
78
|
+
"VectorConfig",
|
|
79
|
+
"SearchResult",
|
|
80
|
+
# Data
|
|
81
|
+
"DocParser",
|
|
82
|
+
# Output
|
|
83
|
+
"StructuredOutput",
|
|
84
|
+
"DocGen",
|
|
85
|
+
# Scenarios
|
|
86
|
+
"Reviewer",
|
|
87
|
+
"ReviewerConfig",
|
|
88
|
+
"ReviewResult",
|
|
89
|
+
"KnowledgeQA",
|
|
90
|
+
"KnowledgeQAConfig",
|
|
91
|
+
"AnswerResult",
|
|
92
|
+
"SourceReference",
|
|
93
|
+
# Viz
|
|
94
|
+
"Dashboard",
|
|
95
|
+
"DashboardTheme",
|
|
96
|
+
"Metric",
|
|
97
|
+
"Chart",
|
|
98
|
+
"ChartType",
|
|
99
|
+
"ChartConfig",
|
|
100
|
+
# Workflow
|
|
101
|
+
"Workflow",
|
|
102
|
+
"WorkflowConfig",
|
|
103
|
+
"WorkflowContext",
|
|
104
|
+
"WorkflowResult",
|
|
105
|
+
"StepResult",
|
|
106
|
+
"StepStatus",
|
|
107
|
+
"Step",
|
|
108
|
+
"StepType",
|
|
109
|
+
"ParallelConfig",
|
|
110
|
+
"LoopConfig",
|
|
111
|
+
"LoopType",
|
|
112
|
+
# Local Models
|
|
113
|
+
"LocalModelManager",
|
|
114
|
+
"LocalModelConfig",
|
|
115
|
+
"ModelCategory",
|
|
116
|
+
"ModelInfo",
|
|
117
|
+
"WhisperModel",
|
|
118
|
+
"EmbeddingModel",
|
|
119
|
+
"check_hardware_requirements",
|
|
120
|
+
]
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"""核心模块 - LLM引擎、向量引擎、记忆引擎、工具引擎"""
|
|
2
|
+
|
|
3
|
+
from ai_sdk.core.llm import LLMEngine, LLMConfig
|
|
4
|
+
from ai_sdk.core.memory import (
|
|
5
|
+
Message,
|
|
6
|
+
SessionConfig,
|
|
7
|
+
SessionMemory,
|
|
8
|
+
)
|
|
9
|
+
from ai_sdk.core.vector import (
|
|
10
|
+
Document,
|
|
11
|
+
SearchResult,
|
|
12
|
+
VectorConfig,
|
|
13
|
+
VectorEngine,
|
|
14
|
+
VectorStore,
|
|
15
|
+
InMemoryVectorStore,
|
|
16
|
+
ChromaVectorStore,
|
|
17
|
+
FAISSVectorStore,
|
|
18
|
+
EmbeddingEngine,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
__all__ = [
|
|
22
|
+
"LLMEngine",
|
|
23
|
+
"LLMConfig",
|
|
24
|
+
"Message",
|
|
25
|
+
"SessionConfig",
|
|
26
|
+
"SessionMemory",
|
|
27
|
+
"Document",
|
|
28
|
+
"SearchResult",
|
|
29
|
+
"VectorConfig",
|
|
30
|
+
"VectorEngine",
|
|
31
|
+
"VectorStore",
|
|
32
|
+
"InMemoryVectorStore",
|
|
33
|
+
"ChromaVectorStore",
|
|
34
|
+
"FAISSVectorStore",
|
|
35
|
+
"EmbeddingEngine",
|
|
36
|
+
]
|