vertai 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. vertai-0.1.0/.coverage +0 -0
  2. vertai-0.1.0/PKG-INFO +289 -0
  3. vertai-0.1.0/README.md +253 -0
  4. vertai-0.1.0/ai_sdk/__init__.py +120 -0
  5. vertai-0.1.0/ai_sdk/core/__init__.py +36 -0
  6. vertai-0.1.0/ai_sdk/core/llm.py +1010 -0
  7. vertai-0.1.0/ai_sdk/core/memory.py +374 -0
  8. vertai-0.1.0/ai_sdk/core/vector.py +645 -0
  9. vertai-0.1.0/ai_sdk/data/__init__.py +5 -0
  10. vertai-0.1.0/ai_sdk/data/parser.py +396 -0
  11. vertai-0.1.0/ai_sdk/local/__init__.py +22 -0
  12. vertai-0.1.0/ai_sdk/local/models.py +792 -0
  13. vertai-0.1.0/ai_sdk/output/__init__.py +15 -0
  14. vertai-0.1.0/ai_sdk/output/docgen.py +404 -0
  15. vertai-0.1.0/ai_sdk/output/structured.py +525 -0
  16. vertai-0.1.0/ai_sdk/scenarios/__init__.py +21 -0
  17. vertai-0.1.0/ai_sdk/scenarios/knowledge_qa.py +539 -0
  18. vertai-0.1.0/ai_sdk/scenarios/reviewer.py +262 -0
  19. vertai-0.1.0/ai_sdk/viz/__init__.py +19 -0
  20. vertai-0.1.0/ai_sdk/viz/dashboard.py +820 -0
  21. vertai-0.1.0/ai_sdk/workflow/__init__.py +29 -0
  22. vertai-0.1.0/ai_sdk/workflow/workflow.py +962 -0
  23. vertai-0.1.0/docs/FUNCTION_DEPENDENCIES.md +252 -0
  24. vertai-0.1.0/docs/superpowers/specs/2026-06-06-ai-agent-sdk-design.md +207 -0
  25. vertai-0.1.0/examples/knowledge_qa_demo.py +190 -0
  26. vertai-0.1.0/examples/quick_start_knowledge_base.py +397 -0
  27. vertai-0.1.0/pyproject.toml +73 -0
  28. vertai-0.1.0/pytest.ini +3 -0
  29. vertai-0.1.0/requirements-dev.txt +1 -0
  30. vertai-0.1.0/requirements.txt +0 -0
  31. vertai-0.1.0/sample_docs/machine_learning.txt +26 -0
  32. vertai-0.1.0/sample_docs/python.txt +16 -0
  33. vertai-0.1.0/session_user_123.json +30 -0
  34. vertai-0.1.0/tests/__init__.py +1 -0
  35. vertai-0.1.0/tests/conftest.py +8 -0
  36. vertai-0.1.0/tests/test_dashboard.py +561 -0
  37. vertai-0.1.0/tests/test_deepseek_integration.py +369 -0
  38. vertai-0.1.0/tests/test_docgen.py +439 -0
  39. vertai-0.1.0/tests/test_knowledge_qa.py +878 -0
  40. vertai-0.1.0/tests/test_llm.py +1298 -0
  41. vertai-0.1.0/tests/test_local_models.py +532 -0
  42. vertai-0.1.0/tests/test_memory.py +448 -0
  43. vertai-0.1.0/tests/test_parser.py +631 -0
  44. vertai-0.1.0/tests/test_reviewer.py +381 -0
  45. vertai-0.1.0/tests/test_structured.py +1119 -0
  46. vertai-0.1.0/tests/test_vector.py +1010 -0
  47. vertai-0.1.0/tests/test_workflow.py +1012 -0
vertai-0.1.0/.coverage ADDED
Binary file
vertai-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,289 @@
1
+ Metadata-Version: 2.4
2
+ Name: vertai
3
+ Version: 0.1.0
4
+ Summary: 本地优先、渐进式复杂度的 AI 智能体开发 SDK
5
+ Project-URL: Homepage, https://github.com/EnjouZeratul/vertai
6
+ Project-URL: Documentation, https://github.com/EnjouZeratul/vertai#readme
7
+ Author: AI SDK Team
8
+ License-Expression: MIT
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.10
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Requires-Python: >=3.10
17
+ Requires-Dist: httpx>=0.25.0
18
+ Requires-Dist: pydantic>=2.0.0
19
+ Provides-Extra: dev
20
+ Requires-Dist: mypy>=1.0.0; extra == 'dev'
21
+ Requires-Dist: pytest-asyncio>=0.21.0; extra == 'dev'
22
+ Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
23
+ Requires-Dist: pytest>=7.0.0; extra == 'dev'
24
+ Requires-Dist: ruff>=0.1.0; extra == 'dev'
25
+ Provides-Extra: doc-parser
26
+ Requires-Dist: openpyxl>=3.1.0; extra == 'doc-parser'
27
+ Requires-Dist: pymupdf>=1.23.0; extra == 'doc-parser'
28
+ Requires-Dist: python-docx>=1.0.0; extra == 'doc-parser'
29
+ Requires-Dist: python-pptx>=0.6.21; extra == 'doc-parser'
30
+ Provides-Extra: embeddings
31
+ Requires-Dist: sentence-transformers>=2.0.0; extra == 'embeddings'
32
+ Provides-Extra: production
33
+ Requires-Dist: ai-sdk[doc-parser,embeddings]; extra == 'production'
34
+ Requires-Dist: chromadb>=0.4.0; extra == 'production'
35
+ Description-Content-Type: text/markdown
36
+
37
+ # VertAI
38
+
39
+ 垂直领域 AI 智能体开发 SDK,支持完全离线运行。
40
+
41
+ A vertical-domain AI agent development SDK designed for fully offline operation.
42
+
43
+ ## 设计理念 | Design Philosophy
44
+
45
+ **模块化架构**:核心功能轻量安装,按需扩展语义能力。
46
+
47
+ **Modular Architecture**: Lightweight core installation with optional semantic capabilities.
48
+
49
+ ```
50
+ vertai (核心 ~5MB)
51
+ ├── Workflow # 工作流编排
52
+ ├── Dashboard # 数据可视化
53
+ ├── DocGen # 文档生成
54
+ ├── DocParser # 文档解析 (Markdown)
55
+ ├── SessionMemory # 会话管理
56
+ └── VectorEngine # 向量存储 (需嵌入模型提供语义能力)
57
+
58
+ 可选扩展
59
+ ├── [embeddings] # 离线语义搜索
60
+ ├── [doc-parser] # 文档解析 (PDF/Word/Excel)
61
+ └── [production] # 生产环境完整配置
62
+ ```
63
+
64
+ ## 安装 | Installation
65
+
66
+ ### 核心安装 | Core Installation
67
+
68
+ ```bash
69
+ pip install vertai
70
+ ```
71
+
72
+ ### 扩展安装 | Optional Extensions
73
+
74
+ ```bash
75
+ # 离线语义搜索支持
76
+ # Offline semantic search support
77
+ pip install vertai[embeddings]
78
+
79
+ # 文档解析支持 (PDF/Word/Excel/PPT)
80
+ # Document parsing support (PDF/Word/Excel/PPT)
81
+ pip install vertai[doc-parser]
82
+
83
+ # 完整生产配置
84
+ # Complete production configuration
85
+ pip install vertai[production]
86
+ ```
87
+
88
+ | 安装选项 | 体积 | 功能 |
89
+ |---------|------|------|
90
+ | 核心 | ~5MB | Workflow, Dashboard, DocGen, Markdown解析 |
91
+ | [embeddings] | ~500MB | 离线语义向量搜索 |
92
+ | [doc-parser] | ~50MB | PDF/Word/Excel/PPT解析 |
93
+ | [production] | ~600MB | 完整生产配置 |
94
+
95
+ | Installation Option | Size | Features |
96
+ |---------------------|------|----------|
97
+ | Core | ~5MB | Workflow, Dashboard, DocGen, Markdown parsing |
98
+ | [embeddings] | ~500MB | Offline semantic vector search |
99
+ | [doc-parser] | ~50MB | PDF/Word/Excel/PPT parsing |
100
+ | [production] | ~600MB | Complete production configuration |
101
+
102
+ ## 快速开始 | Quick Start
103
+
104
+ ### 工作流编排(完全离线)| Workflow Orchestration (Fully Offline)
105
+
106
+ ```python
107
+ from ai_sdk import Workflow
108
+
109
+ wf = Workflow()
110
+ wf.step("load", lambda ctx: ctx.set("data", [1, 2, 3, 4, 5]))
111
+ wf.step("process", lambda ctx: ctx.set("sum", sum(ctx.get("data"))))
112
+ wf.step("output", lambda ctx: print(f"总和: {ctx.get('sum')}"))
113
+ wf.run()
114
+ ```
115
+
116
+ ### 语义向量搜索(需安装 embeddings)| Semantic Vector Search (requires embeddings)
117
+
118
+ ```python
119
+ from ai_sdk import VectorEngine, Document
120
+ from sentence_transformers import SentenceTransformer
121
+
122
+ # 加载嵌入模型(首次下载约100MB,之后离线可用)
123
+ # Load embedding model (~100MB first download, then works offline)
124
+ model = SentenceTransformer('bge-small-zh-v1.5')
125
+
126
+ def embedding_fn(text):
127
+ return model.encode(text).tolist()
128
+
129
+ # 创建向量引擎
130
+ # Create vector engine
131
+ engine = VectorEngine(store_type="memory", embedding_fn=embedding_fn)
132
+
133
+ # 索引文档
134
+ # Index documents
135
+ engine.index_documents([
136
+ Document(content="Python是一种编程语言,由Guido van Rossum创建"),
137
+ Document(content="机器学习是人工智能的子领域"),
138
+ Document(content="深度学习使用多层神经网络"),
139
+ ])
140
+
141
+ # 语义搜索
142
+ # Semantic search
143
+ results = engine.search("编程语言")
144
+ # 返回:Python是一种编程语言...(语义匹配,非关键词匹配)
145
+ # Returns: Python是一种编程语言... (semantic match, not keyword match)
146
+ ```
147
+
148
+ ### LLM 对话(需配置 API)| LLM Chat (requires API configuration)
149
+
150
+ ```python
151
+ from ai_sdk import LLMEngine, LLMConfig, ModelProvider
152
+
153
+ config = LLMConfig(
154
+ provider=ModelProvider.DEEPSEEK,
155
+ base_url="https://api.deepseek.com/anthropic",
156
+ api_key="sk-xxx", # 或设置环境变量 AI_SDK_API_KEY
157
+ model="deepseek-v4-flash",
158
+ )
159
+
160
+ llm = LLMEngine(config)
161
+
162
+ # 单次生成
163
+ # Single generation
164
+ result = llm.generate("你好")
165
+
166
+ # 流式输出
167
+ # Streaming output
168
+ for chunk in llm.stream("讲个故事"):
169
+ print(chunk, end="", flush=True)
170
+
171
+ # 多轮对话
172
+ # Multi-turn conversation
173
+ messages = [
174
+ {"role": "user", "content": "我叫小明"},
175
+ {"role": "assistant", "content": "你好小明!"},
176
+ {"role": "user", "content": "我叫什么名字?"},
177
+ ]
178
+ result = llm.chat(messages)
179
+ ```
180
+
181
+ ### 结构化数据提取 | Structured Data Extraction
182
+
183
+ ```python
184
+ from ai_sdk import StructuredOutput
185
+
186
+ schema = {"name": "string", "amount": "number"}
187
+
188
+ # 正则模式(完全离线,简单模式)
189
+ # Regex mode (fully offline, simple patterns)
190
+ output = StructuredOutput(schema)
191
+ result = output.extract("张三报销500元")
192
+ # {'name': '张三', 'amount': 500.0}
193
+
194
+ # LLM模式(需配置API,语义理解)
195
+ # LLM mode (requires API, semantic understanding)
196
+ from ai_sdk import LLMEngine, LLMConfig, ModelProvider
197
+ llm = LLMEngine(LLMConfig(
198
+ provider=ModelProvider.DEEPSEEK,
199
+ base_url="https://api.deepseek.com/anthropic",
200
+ api_key="sk-xxx",
201
+ ))
202
+ output = StructuredOutput(schema, llm=llm)
203
+ result = output.extract("李四消费了三百块")
204
+ # {'name': '李四', 'amount': 300.0}(语义理解中文数字)
205
+ # {'name': '李四', 'amount': 300.0} (semantic understanding of Chinese numbers)
206
+ ```
207
+
208
+ ## 功能模块 | Feature Modules
209
+
210
+ | 模块 | 离线可用 | 依赖 |
211
+ |------|---------|------|
212
+ | Workflow | ✅ | 无 |
213
+ | Dashboard | ✅ | 无 |
214
+ | DocGen (Markdown/HTML) | ✅ | 无 |
215
+ | DocParser (Markdown) | ✅ | 无 |
216
+ | SessionMemory | ✅ | 无 |
217
+ | VectorEngine (存储) | ✅ | 无 |
218
+ | VectorEngine (语义搜索) | ✅ | sentence-transformers |
219
+ | StructuredOutput (正则) | ✅ | 无 |
220
+ | StructuredOutput (语义) | ❌ | LLM API |
221
+ | LLMEngine | ❌ | LLM API 或 Ollama |
222
+ | KnowledgeQA | ✅ | 向量搜索离线,生成需LLM |
223
+ | LocalModelManager | ✅ | 模型文件本地存储 |
224
+
225
+ | Module | Offline | Dependencies |
226
+ |--------|---------|--------------|
227
+ | Workflow | ✅ | None |
228
+ | Dashboard | ✅ | None |
229
+ | DocGen (Markdown/HTML) | ✅ | None |
230
+ | DocParser (Markdown) | ✅ | None |
231
+ | SessionMemory | ✅ | None |
232
+ | VectorEngine (Storage) | ✅ | None |
233
+ | VectorEngine (Semantic Search) | ✅ | sentence-transformers |
234
+ | StructuredOutput (Regex) | ✅ | None |
235
+ | StructuredOutput (Semantic) | ❌ | LLM API |
236
+ | LLMEngine | ❌ | LLM API or Ollama |
237
+ | KnowledgeQA | ✅ | Vector search offline, generation needs LLM |
238
+ | LocalModelManager | ✅ | Local model file storage |
239
+
240
+ ## 本地模型 | Local Models
241
+
242
+ ### 嵌入模型 | Embedding Models
243
+
244
+ | 模型 | 体积 | 语言 | 离线 |
245
+ |------|------|------|------|
246
+ | bge-small-zh-v1.5 | 100MB | 中文 | ✅ |
247
+ | bge-large-zh-v1.5 | 650MB | 中文 | ✅ |
248
+ | all-MiniLM-L6-v2 | 80MB | 英文 | ✅ |
249
+
250
+ | Model | Size | Language | Offline |
251
+ |-------|------|----------|---------|
252
+ | bge-small-zh-v1.5 | 100MB | Chinese | ✅ |
253
+ | bge-large-zh-v1.5 | 650MB | Chinese | ✅ |
254
+ | all-MiniLM-L6-v2 | 80MB | English | ✅ |
255
+
256
+ ### 语音模型 | Speech Models
257
+
258
+ | 模型 | 体积 | 最低配置 | 离线 |
259
+ |------|------|---------|------|
260
+ | whisper-tiny | 75MB | 1GB RAM | ✅ |
261
+ | whisper-small | 466MB | 2GB RAM | ✅ |
262
+ | whisper-large-v3 | 2.9GB | 10GB RAM | ✅ |
263
+
264
+ | Model | Size | Min Requirements | Offline |
265
+ |-------|------|------------------|---------|
266
+ | whisper-tiny | 75MB | 1GB RAM | ✅ |
267
+ | whisper-small | 466MB | 2GB RAM | ✅ |
268
+ | whisper-large-v3 | 2.9GB | 10GB RAM | ✅ |
269
+
270
+ ```python
271
+ from ai_sdk import LocalModelManager
272
+
273
+ manager = LocalModelManager()
274
+ manager.download("bge-small-zh-v1.5") # 首次下载 | First download
275
+ model = manager.load("bge-small-zh-v1.5") # 之后离线加载 | Then load offline
276
+ ```
277
+
278
+ ## 测试 | Testing
279
+
280
+ ```bash
281
+ pip install vertai[dev]
282
+ python -m pytest tests/ -v --cov=ai_sdk
283
+
284
+ # 642 passed, 20 skipped, 94% coverage
285
+ ```
286
+
287
+ ## 许可证 | License
288
+
289
+ MIT
vertai-0.1.0/README.md ADDED
@@ -0,0 +1,253 @@
1
+ # VertAI
2
+
3
+ 垂直领域 AI 智能体开发 SDK,支持完全离线运行。
4
+
5
+ A vertical-domain AI agent development SDK designed for fully offline operation.
6
+
7
+ ## 设计理念 | Design Philosophy
8
+
9
+ **模块化架构**:核心功能轻量安装,按需扩展语义能力。
10
+
11
+ **Modular Architecture**: Lightweight core installation with optional semantic capabilities.
12
+
13
+ ```
14
+ vertai (核心 ~5MB)
15
+ ├── Workflow # 工作流编排
16
+ ├── Dashboard # 数据可视化
17
+ ├── DocGen # 文档生成
18
+ ├── DocParser # 文档解析 (Markdown)
19
+ ├── SessionMemory # 会话管理
20
+ └── VectorEngine # 向量存储 (需嵌入模型提供语义能力)
21
+
22
+ 可选扩展
23
+ ├── [embeddings] # 离线语义搜索
24
+ ├── [doc-parser] # 文档解析 (PDF/Word/Excel)
25
+ └── [production] # 生产环境完整配置
26
+ ```
27
+
28
+ ## 安装 | Installation
29
+
30
+ ### 核心安装 | Core Installation
31
+
32
+ ```bash
33
+ pip install vertai
34
+ ```
35
+
36
+ ### 扩展安装 | Optional Extensions
37
+
38
+ ```bash
39
+ # 离线语义搜索支持
40
+ # Offline semantic search support
41
+ pip install vertai[embeddings]
42
+
43
+ # 文档解析支持 (PDF/Word/Excel/PPT)
44
+ # Document parsing support (PDF/Word/Excel/PPT)
45
+ pip install vertai[doc-parser]
46
+
47
+ # 完整生产配置
48
+ # Complete production configuration
49
+ pip install vertai[production]
50
+ ```
51
+
52
+ | 安装选项 | 体积 | 功能 |
53
+ |---------|------|------|
54
+ | 核心 | ~5MB | Workflow, Dashboard, DocGen, Markdown解析 |
55
+ | [embeddings] | ~500MB | 离线语义向量搜索 |
56
+ | [doc-parser] | ~50MB | PDF/Word/Excel/PPT解析 |
57
+ | [production] | ~600MB | 完整生产配置 |
58
+
59
+ | Installation Option | Size | Features |
60
+ |---------------------|------|----------|
61
+ | Core | ~5MB | Workflow, Dashboard, DocGen, Markdown parsing |
62
+ | [embeddings] | ~500MB | Offline semantic vector search |
63
+ | [doc-parser] | ~50MB | PDF/Word/Excel/PPT parsing |
64
+ | [production] | ~600MB | Complete production configuration |
65
+
66
+ ## 快速开始 | Quick Start
67
+
68
+ ### 工作流编排(完全离线)| Workflow Orchestration (Fully Offline)
69
+
70
+ ```python
71
+ from ai_sdk import Workflow
72
+
73
+ wf = Workflow()
74
+ wf.step("load", lambda ctx: ctx.set("data", [1, 2, 3, 4, 5]))
75
+ wf.step("process", lambda ctx: ctx.set("sum", sum(ctx.get("data"))))
76
+ wf.step("output", lambda ctx: print(f"总和: {ctx.get('sum')}"))
77
+ wf.run()
78
+ ```
79
+
80
+ ### 语义向量搜索(需安装 embeddings)| Semantic Vector Search (requires embeddings)
81
+
82
+ ```python
83
+ from ai_sdk import VectorEngine, Document
84
+ from sentence_transformers import SentenceTransformer
85
+
86
+ # 加载嵌入模型(首次下载约100MB,之后离线可用)
87
+ # Load embedding model (~100MB first download, then works offline)
88
+ model = SentenceTransformer('bge-small-zh-v1.5')
89
+
90
+ def embedding_fn(text):
91
+ return model.encode(text).tolist()
92
+
93
+ # 创建向量引擎
94
+ # Create vector engine
95
+ engine = VectorEngine(store_type="memory", embedding_fn=embedding_fn)
96
+
97
+ # 索引文档
98
+ # Index documents
99
+ engine.index_documents([
100
+ Document(content="Python是一种编程语言,由Guido van Rossum创建"),
101
+ Document(content="机器学习是人工智能的子领域"),
102
+ Document(content="深度学习使用多层神经网络"),
103
+ ])
104
+
105
+ # 语义搜索
106
+ # Semantic search
107
+ results = engine.search("编程语言")
108
+ # 返回:Python是一种编程语言...(语义匹配,非关键词匹配)
109
+ # Returns: Python是一种编程语言... (semantic match, not keyword match)
110
+ ```
111
+
112
+ ### LLM 对话(需配置 API)| LLM Chat (requires API configuration)
113
+
114
+ ```python
115
+ from ai_sdk import LLMEngine, LLMConfig, ModelProvider
116
+
117
+ config = LLMConfig(
118
+ provider=ModelProvider.DEEPSEEK,
119
+ base_url="https://api.deepseek.com/anthropic",
120
+ api_key="sk-xxx", # 或设置环境变量 AI_SDK_API_KEY
121
+ model="deepseek-v4-flash",
122
+ )
123
+
124
+ llm = LLMEngine(config)
125
+
126
+ # 单次生成
127
+ # Single generation
128
+ result = llm.generate("你好")
129
+
130
+ # 流式输出
131
+ # Streaming output
132
+ for chunk in llm.stream("讲个故事"):
133
+ print(chunk, end="", flush=True)
134
+
135
+ # 多轮对话
136
+ # Multi-turn conversation
137
+ messages = [
138
+ {"role": "user", "content": "我叫小明"},
139
+ {"role": "assistant", "content": "你好小明!"},
140
+ {"role": "user", "content": "我叫什么名字?"},
141
+ ]
142
+ result = llm.chat(messages)
143
+ ```
144
+
145
+ ### 结构化数据提取 | Structured Data Extraction
146
+
147
+ ```python
148
+ from ai_sdk import StructuredOutput
149
+
150
+ schema = {"name": "string", "amount": "number"}
151
+
152
+ # 正则模式(完全离线,简单模式)
153
+ # Regex mode (fully offline, simple patterns)
154
+ output = StructuredOutput(schema)
155
+ result = output.extract("张三报销500元")
156
+ # {'name': '张三', 'amount': 500.0}
157
+
158
+ # LLM模式(需配置API,语义理解)
159
+ # LLM mode (requires API, semantic understanding)
160
+ from ai_sdk import LLMEngine, LLMConfig, ModelProvider
161
+ llm = LLMEngine(LLMConfig(
162
+ provider=ModelProvider.DEEPSEEK,
163
+ base_url="https://api.deepseek.com/anthropic",
164
+ api_key="sk-xxx",
165
+ ))
166
+ output = StructuredOutput(schema, llm=llm)
167
+ result = output.extract("李四消费了三百块")
168
+ # {'name': '李四', 'amount': 300.0}(语义理解中文数字)
169
+ # {'name': '李四', 'amount': 300.0} (semantic understanding of Chinese numbers)
170
+ ```
171
+
172
+ ## 功能模块 | Feature Modules
173
+
174
+ | 模块 | 离线可用 | 依赖 |
175
+ |------|---------|------|
176
+ | Workflow | ✅ | 无 |
177
+ | Dashboard | ✅ | 无 |
178
+ | DocGen (Markdown/HTML) | ✅ | 无 |
179
+ | DocParser (Markdown) | ✅ | 无 |
180
+ | SessionMemory | ✅ | 无 |
181
+ | VectorEngine (存储) | ✅ | 无 |
182
+ | VectorEngine (语义搜索) | ✅ | sentence-transformers |
183
+ | StructuredOutput (正则) | ✅ | 无 |
184
+ | StructuredOutput (语义) | ❌ | LLM API |
185
+ | LLMEngine | ❌ | LLM API 或 Ollama |
186
+ | KnowledgeQA | ✅ | 向量搜索离线,生成需LLM |
187
+ | LocalModelManager | ✅ | 模型文件本地存储 |
188
+
189
+ | Module | Offline | Dependencies |
190
+ |--------|---------|--------------|
191
+ | Workflow | ✅ | None |
192
+ | Dashboard | ✅ | None |
193
+ | DocGen (Markdown/HTML) | ✅ | None |
194
+ | DocParser (Markdown) | ✅ | None |
195
+ | SessionMemory | ✅ | None |
196
+ | VectorEngine (Storage) | ✅ | None |
197
+ | VectorEngine (Semantic Search) | ✅ | sentence-transformers |
198
+ | StructuredOutput (Regex) | ✅ | None |
199
+ | StructuredOutput (Semantic) | ❌ | LLM API |
200
+ | LLMEngine | ❌ | LLM API or Ollama |
201
+ | KnowledgeQA | ✅ | Vector search offline, generation needs LLM |
202
+ | LocalModelManager | ✅ | Local model file storage |
203
+
204
+ ## 本地模型 | Local Models
205
+
206
+ ### 嵌入模型 | Embedding Models
207
+
208
+ | 模型 | 体积 | 语言 | 离线 |
209
+ |------|------|------|------|
210
+ | bge-small-zh-v1.5 | 100MB | 中文 | ✅ |
211
+ | bge-large-zh-v1.5 | 650MB | 中文 | ✅ |
212
+ | all-MiniLM-L6-v2 | 80MB | 英文 | ✅ |
213
+
214
+ | Model | Size | Language | Offline |
215
+ |-------|------|----------|---------|
216
+ | bge-small-zh-v1.5 | 100MB | Chinese | ✅ |
217
+ | bge-large-zh-v1.5 | 650MB | Chinese | ✅ |
218
+ | all-MiniLM-L6-v2 | 80MB | English | ✅ |
219
+
220
+ ### 语音模型 | Speech Models
221
+
222
+ | 模型 | 体积 | 最低配置 | 离线 |
223
+ |------|------|---------|------|
224
+ | whisper-tiny | 75MB | 1GB RAM | ✅ |
225
+ | whisper-small | 466MB | 2GB RAM | ✅ |
226
+ | whisper-large-v3 | 2.9GB | 10GB RAM | ✅ |
227
+
228
+ | Model | Size | Min Requirements | Offline |
229
+ |-------|------|------------------|---------|
230
+ | whisper-tiny | 75MB | 1GB RAM | ✅ |
231
+ | whisper-small | 466MB | 2GB RAM | ✅ |
232
+ | whisper-large-v3 | 2.9GB | 10GB RAM | ✅ |
233
+
234
+ ```python
235
+ from ai_sdk import LocalModelManager
236
+
237
+ manager = LocalModelManager()
238
+ manager.download("bge-small-zh-v1.5") # 首次下载 | First download
239
+ model = manager.load("bge-small-zh-v1.5") # 之后离线加载 | Then load offline
240
+ ```
241
+
242
+ ## 测试 | Testing
243
+
244
+ ```bash
245
+ pip install vertai[dev]
246
+ python -m pytest tests/ -v --cov=ai_sdk
247
+
248
+ # 642 passed, 20 skipped, 94% coverage
249
+ ```
250
+
251
+ ## 许可证 | License
252
+
253
+ MIT
@@ -0,0 +1,120 @@
1
+ """VertAI - 垂直领域 AI 智能体开发 SDK"""
2
+
3
+ __version__ = "0.1.0"
4
+
5
+ from ai_sdk.core.llm import (
6
+ LLMEngine,
7
+ LLMConfig,
8
+ ModelProvider,
9
+ ChatMessage,
10
+ GenerateResult,
11
+ )
12
+ from ai_sdk.core.memory import (
13
+ Message,
14
+ SessionConfig,
15
+ SessionMemory,
16
+ )
17
+ from ai_sdk.core.vector import (
18
+ Document,
19
+ VectorEngine,
20
+ VectorConfig,
21
+ SearchResult,
22
+ )
23
+ from ai_sdk.data.parser import DocParser
24
+ from ai_sdk.output.structured import StructuredOutput
25
+ from ai_sdk.output.docgen import DocGen
26
+ from ai_sdk.scenarios.reviewer import Reviewer, ReviewerConfig, ReviewResult
27
+ from ai_sdk.scenarios.knowledge_qa import (
28
+ KnowledgeQA,
29
+ KnowledgeQAConfig,
30
+ AnswerResult,
31
+ SourceReference,
32
+ )
33
+ from ai_sdk.viz.dashboard import (
34
+ Dashboard,
35
+ DashboardTheme,
36
+ Metric,
37
+ Chart,
38
+ ChartType,
39
+ ChartConfig,
40
+ )
41
+ from ai_sdk.workflow import (
42
+ Workflow,
43
+ WorkflowConfig,
44
+ WorkflowContext,
45
+ WorkflowResult,
46
+ StepResult,
47
+ StepStatus,
48
+ Step,
49
+ StepType,
50
+ ParallelConfig,
51
+ LoopConfig,
52
+ LoopType,
53
+ )
54
+ from ai_sdk.local import (
55
+ LocalModelManager,
56
+ LocalModelConfig,
57
+ ModelCategory,
58
+ ModelInfo,
59
+ WhisperModel,
60
+ EmbeddingModel,
61
+ )
62
+ from ai_sdk.local.models import check_hardware_requirements
63
+
64
+ __all__ = [
65
+ # LLM
66
+ "LLMEngine",
67
+ "LLMConfig",
68
+ "ModelProvider",
69
+ "ChatMessage",
70
+ "GenerateResult",
71
+ # Memory
72
+ "Message",
73
+ "SessionConfig",
74
+ "SessionMemory",
75
+ # Vector
76
+ "Document",
77
+ "VectorEngine",
78
+ "VectorConfig",
79
+ "SearchResult",
80
+ # Data
81
+ "DocParser",
82
+ # Output
83
+ "StructuredOutput",
84
+ "DocGen",
85
+ # Scenarios
86
+ "Reviewer",
87
+ "ReviewerConfig",
88
+ "ReviewResult",
89
+ "KnowledgeQA",
90
+ "KnowledgeQAConfig",
91
+ "AnswerResult",
92
+ "SourceReference",
93
+ # Viz
94
+ "Dashboard",
95
+ "DashboardTheme",
96
+ "Metric",
97
+ "Chart",
98
+ "ChartType",
99
+ "ChartConfig",
100
+ # Workflow
101
+ "Workflow",
102
+ "WorkflowConfig",
103
+ "WorkflowContext",
104
+ "WorkflowResult",
105
+ "StepResult",
106
+ "StepStatus",
107
+ "Step",
108
+ "StepType",
109
+ "ParallelConfig",
110
+ "LoopConfig",
111
+ "LoopType",
112
+ # Local Models
113
+ "LocalModelManager",
114
+ "LocalModelConfig",
115
+ "ModelCategory",
116
+ "ModelInfo",
117
+ "WhisperModel",
118
+ "EmbeddingModel",
119
+ "check_hardware_requirements",
120
+ ]
@@ -0,0 +1,36 @@
1
+ """核心模块 - LLM引擎、向量引擎、记忆引擎、工具引擎"""
2
+
3
+ from ai_sdk.core.llm import LLMEngine, LLMConfig
4
+ from ai_sdk.core.memory import (
5
+ Message,
6
+ SessionConfig,
7
+ SessionMemory,
8
+ )
9
+ from ai_sdk.core.vector import (
10
+ Document,
11
+ SearchResult,
12
+ VectorConfig,
13
+ VectorEngine,
14
+ VectorStore,
15
+ InMemoryVectorStore,
16
+ ChromaVectorStore,
17
+ FAISSVectorStore,
18
+ EmbeddingEngine,
19
+ )
20
+
21
+ __all__ = [
22
+ "LLMEngine",
23
+ "LLMConfig",
24
+ "Message",
25
+ "SessionConfig",
26
+ "SessionMemory",
27
+ "Document",
28
+ "SearchResult",
29
+ "VectorConfig",
30
+ "VectorEngine",
31
+ "VectorStore",
32
+ "InMemoryVectorStore",
33
+ "ChromaVectorStore",
34
+ "FAISSVectorStore",
35
+ "EmbeddingEngine",
36
+ ]