xparse-client 0.2.19__py3-none-any.whl → 0.3.0b8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- example/1_basic_api_usage.py +198 -0
- example/2_async_job.py +210 -0
- example/3_local_workflow.py +300 -0
- example/4_advanced_workflow.py +327 -0
- example/README.md +128 -0
- example/config_example.json +95 -0
- tests/conftest.py +310 -0
- tests/unit/__init__.py +1 -0
- tests/unit/api/__init__.py +1 -0
- tests/unit/api/test_extract.py +232 -0
- tests/unit/api/test_local.py +231 -0
- tests/unit/api/test_parse.py +374 -0
- tests/unit/api/test_pipeline.py +369 -0
- tests/unit/api/test_workflows.py +108 -0
- tests/unit/connectors/test_ftp.py +525 -0
- tests/unit/connectors/test_local_connectors.py +324 -0
- tests/unit/connectors/test_milvus.py +368 -0
- tests/unit/connectors/test_qdrant.py +399 -0
- tests/unit/connectors/test_s3.py +598 -0
- tests/unit/connectors/test_smb.py +442 -0
- tests/unit/connectors/test_utils.py +335 -0
- tests/unit/models/test_local.py +54 -0
- tests/unit/models/test_pipeline_stages.py +144 -0
- tests/unit/models/test_workflows.py +55 -0
- tests/unit/test_base.py +437 -0
- tests/unit/test_client.py +110 -0
- tests/unit/test_config.py +160 -0
- tests/unit/test_exceptions.py +182 -0
- tests/unit/test_http.py +562 -0
- xparse_client/__init__.py +111 -20
- xparse_client/_base.py +188 -0
- xparse_client/_client.py +218 -0
- xparse_client/_config.py +221 -0
- xparse_client/_http.py +351 -0
- xparse_client/api/__init__.py +14 -0
- xparse_client/api/extract.py +109 -0
- xparse_client/api/local.py +225 -0
- xparse_client/api/parse.py +209 -0
- xparse_client/api/pipeline.py +134 -0
- xparse_client/api/workflows.py +204 -0
- xparse_client/connectors/__init__.py +45 -0
- xparse_client/connectors/_utils.py +138 -0
- xparse_client/connectors/destinations/__init__.py +45 -0
- xparse_client/connectors/destinations/base.py +116 -0
- xparse_client/connectors/destinations/local.py +91 -0
- xparse_client/connectors/destinations/milvus.py +229 -0
- xparse_client/connectors/destinations/qdrant.py +238 -0
- xparse_client/connectors/destinations/s3.py +163 -0
- xparse_client/connectors/sources/__init__.py +45 -0
- xparse_client/connectors/sources/base.py +74 -0
- xparse_client/connectors/sources/ftp.py +278 -0
- xparse_client/connectors/sources/local.py +176 -0
- xparse_client/connectors/sources/s3.py +232 -0
- xparse_client/connectors/sources/smb.py +259 -0
- xparse_client/exceptions.py +398 -0
- xparse_client/models/__init__.py +60 -0
- xparse_client/models/chunk.py +39 -0
- xparse_client/models/embed.py +62 -0
- xparse_client/models/extract.py +41 -0
- xparse_client/models/local.py +38 -0
- xparse_client/models/parse.py +132 -0
- xparse_client/models/pipeline.py +134 -0
- xparse_client/models/workflows.py +74 -0
- xparse_client-0.3.0b8.dist-info/METADATA +1075 -0
- xparse_client-0.3.0b8.dist-info/RECORD +68 -0
- {xparse_client-0.2.19.dist-info → xparse_client-0.3.0b8.dist-info}/WHEEL +1 -1
- {xparse_client-0.2.19.dist-info → xparse_client-0.3.0b8.dist-info}/licenses/LICENSE +1 -1
- {xparse_client-0.2.19.dist-info → xparse_client-0.3.0b8.dist-info}/top_level.txt +2 -0
- xparse_client/pipeline/__init__.py +0 -3
- xparse_client/pipeline/config.py +0 -129
- xparse_client/pipeline/destinations.py +0 -489
- xparse_client/pipeline/pipeline.py +0 -690
- xparse_client/pipeline/sources.py +0 -583
- xparse_client-0.2.19.dist-info/METADATA +0 -1050
- xparse_client-0.2.19.dist-info/RECORD +0 -11
|
@@ -0,0 +1,324 @@
|
|
|
1
|
+
"""LocalSource 和 LocalDestination 集成测试
|
|
2
|
+
|
|
3
|
+
测试本地文件系统 Source 和 Destination,包括:
|
|
4
|
+
- LocalSource: 文件列表、文件读取、pattern 匹配、递归搜索
|
|
5
|
+
- LocalDestination: 文件写入、元数据处理、中间结果
|
|
6
|
+
- 错误处理:目录不存在、文件不存在、权限错误
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
|
|
11
|
+
import pytest
|
|
12
|
+
|
|
13
|
+
from xparse_client.connectors import LocalDestination, LocalSource
|
|
14
|
+
from xparse_client.exceptions import SourceError
|
|
15
|
+
|
|
16
|
+
# ============================================================================
|
|
17
|
+
# LocalSource 测试
|
|
18
|
+
# ============================================================================
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def test_local_source_initialization(tmp_path):
|
|
22
|
+
"""测试 LocalSource 正确初始化"""
|
|
23
|
+
source = LocalSource(directory=str(tmp_path))
|
|
24
|
+
|
|
25
|
+
assert source.directory == tmp_path
|
|
26
|
+
assert source.pattern is None # None 表示匹配所有文件
|
|
27
|
+
assert source.recursive is False
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def test_local_source_directory_not_exist():
|
|
31
|
+
"""测试目录不存在时抛出异常"""
|
|
32
|
+
with pytest.raises(SourceError) as exc_info:
|
|
33
|
+
LocalSource(directory="/nonexistent/directory")
|
|
34
|
+
|
|
35
|
+
assert "目录不存在" in str(exc_info.value)
|
|
36
|
+
assert exc_info.value.connector_type == "local"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def test_local_source_not_a_directory(tmp_path):
|
|
40
|
+
"""测试路径不是目录时抛出异常"""
|
|
41
|
+
# 创建一个文件
|
|
42
|
+
file_path = tmp_path / "file.txt"
|
|
43
|
+
file_path.write_text("test")
|
|
44
|
+
|
|
45
|
+
with pytest.raises(SourceError) as exc_info:
|
|
46
|
+
LocalSource(directory=str(file_path))
|
|
47
|
+
|
|
48
|
+
assert "不是目录" in str(exc_info.value)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def test_local_source_list_files_empty(tmp_path):
|
|
52
|
+
"""测试列出空目录"""
|
|
53
|
+
source = LocalSource(directory=str(tmp_path))
|
|
54
|
+
files = source.list_files()
|
|
55
|
+
|
|
56
|
+
assert files == []
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def test_local_source_list_files_basic(tmp_path):
|
|
60
|
+
"""测试列出文件(非递归)"""
|
|
61
|
+
# 创建测试文件
|
|
62
|
+
(tmp_path / "file1.pdf").write_text("test1")
|
|
63
|
+
(tmp_path / "file2.docx").write_text("test2")
|
|
64
|
+
(tmp_path / "file3.txt").write_text("test3")
|
|
65
|
+
|
|
66
|
+
# 创建子目录文件(不应该被列出)
|
|
67
|
+
subdir = tmp_path / "subdir"
|
|
68
|
+
subdir.mkdir()
|
|
69
|
+
(subdir / "file4.pdf").write_text("test4")
|
|
70
|
+
|
|
71
|
+
source = LocalSource(directory=str(tmp_path))
|
|
72
|
+
files = source.list_files()
|
|
73
|
+
|
|
74
|
+
# 不递归,只列出顶层文件
|
|
75
|
+
assert len(files) == 3
|
|
76
|
+
assert "file1.pdf" in files
|
|
77
|
+
assert "file2.docx" in files
|
|
78
|
+
assert "file3.txt" in files
|
|
79
|
+
assert "subdir/file4.pdf" not in files
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def test_local_source_list_files_with_pattern(tmp_path):
|
|
83
|
+
"""测试 pattern 过滤"""
|
|
84
|
+
# 创建测试文件
|
|
85
|
+
(tmp_path / "file1.pdf").write_text("test1")
|
|
86
|
+
(tmp_path / "file2.docx").write_text("test2")
|
|
87
|
+
(tmp_path / "file3.txt").write_text("test3")
|
|
88
|
+
|
|
89
|
+
source = LocalSource(directory=str(tmp_path), pattern=["*.pdf", "*.docx"])
|
|
90
|
+
files = source.list_files()
|
|
91
|
+
|
|
92
|
+
assert len(files) == 2
|
|
93
|
+
assert "file1.pdf" in files
|
|
94
|
+
assert "file2.docx" in files
|
|
95
|
+
assert "file3.txt" not in files
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def test_local_source_list_files_recursive(tmp_path):
|
|
99
|
+
"""测试递归列出文件"""
|
|
100
|
+
# 创建测试文件
|
|
101
|
+
(tmp_path / "file1.pdf").write_text("test1")
|
|
102
|
+
|
|
103
|
+
# 创建子目录和文件
|
|
104
|
+
subdir = tmp_path / "subdir"
|
|
105
|
+
subdir.mkdir()
|
|
106
|
+
(subdir / "file2.pdf").write_text("test2")
|
|
107
|
+
|
|
108
|
+
subdir2 = subdir / "subdir2"
|
|
109
|
+
subdir2.mkdir()
|
|
110
|
+
(subdir2 / "file3.pdf").write_text("test3")
|
|
111
|
+
|
|
112
|
+
source = LocalSource(directory=str(tmp_path), recursive=True)
|
|
113
|
+
files = source.list_files()
|
|
114
|
+
|
|
115
|
+
assert len(files) == 3
|
|
116
|
+
assert "file1.pdf" in files
|
|
117
|
+
assert "subdir/file2.pdf" in files
|
|
118
|
+
assert "subdir/subdir2/file3.pdf" in files
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def test_local_source_read_file(tmp_path):
|
|
122
|
+
"""测试读取文件"""
|
|
123
|
+
# 创建测试文件
|
|
124
|
+
test_file = tmp_path / "test.pdf"
|
|
125
|
+
test_content = b"PDF content"
|
|
126
|
+
test_file.write_bytes(test_content)
|
|
127
|
+
|
|
128
|
+
source = LocalSource(directory=str(tmp_path))
|
|
129
|
+
file_bytes, data_source = source.read_file("test.pdf")
|
|
130
|
+
|
|
131
|
+
assert file_bytes == test_content
|
|
132
|
+
assert "url" in data_source
|
|
133
|
+
assert "version" in data_source
|
|
134
|
+
assert "date_created" in data_source
|
|
135
|
+
assert "date_modified" in data_source
|
|
136
|
+
assert data_source["record_locator"]["protocol"] == "file"
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def test_local_source_read_file_not_found(tmp_path):
|
|
140
|
+
"""测试读取不存在的文件"""
|
|
141
|
+
source = LocalSource(directory=str(tmp_path))
|
|
142
|
+
|
|
143
|
+
with pytest.raises(SourceError) as exc_info:
|
|
144
|
+
source.read_file("nonexistent.pdf")
|
|
145
|
+
|
|
146
|
+
assert "不存在" in str(exc_info.value)
|
|
147
|
+
assert exc_info.value.connector_type == "local"
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def test_local_source_read_file_path_traversal(tmp_path):
|
|
151
|
+
"""测试路径遍历攻击防护"""
|
|
152
|
+
source = LocalSource(directory=str(tmp_path))
|
|
153
|
+
|
|
154
|
+
with pytest.raises(SourceError) as exc_info:
|
|
155
|
+
source.read_file("../../../etc/passwd")
|
|
156
|
+
|
|
157
|
+
assert "非法路径" in str(exc_info.value)
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def test_local_source_context_manager(tmp_path):
|
|
161
|
+
"""测试上下文管理器"""
|
|
162
|
+
with LocalSource(directory=str(tmp_path)) as source:
|
|
163
|
+
assert source is not None
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
# ============================================================================
|
|
167
|
+
# LocalDestination 测试
|
|
168
|
+
# ============================================================================
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def test_local_destination_initialization(tmp_path):
|
|
172
|
+
"""测试 LocalDestination 正确初始化"""
|
|
173
|
+
output_dir = tmp_path / "output"
|
|
174
|
+
dest = LocalDestination(output_dir=str(output_dir))
|
|
175
|
+
|
|
176
|
+
assert dest.output_dir == output_dir
|
|
177
|
+
assert output_dir.exists()
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def test_local_destination_create_nested_directory(tmp_path):
|
|
181
|
+
"""测试创建嵌套目录"""
|
|
182
|
+
output_dir = tmp_path / "level1" / "level2" / "output"
|
|
183
|
+
dest = LocalDestination(output_dir=str(output_dir))
|
|
184
|
+
|
|
185
|
+
assert dest.output_dir.exists()
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def test_local_destination_write_basic(tmp_path):
|
|
189
|
+
"""测试写入基础数据"""
|
|
190
|
+
dest = LocalDestination(output_dir=str(tmp_path))
|
|
191
|
+
|
|
192
|
+
data = [
|
|
193
|
+
{"element_id": "1", "text": "test1"},
|
|
194
|
+
{"element_id": "2", "text": "test2"},
|
|
195
|
+
]
|
|
196
|
+
metadata = {"filename": "test.pdf"}
|
|
197
|
+
|
|
198
|
+
result = dest.write(data, metadata)
|
|
199
|
+
|
|
200
|
+
assert result is True
|
|
201
|
+
|
|
202
|
+
# 验证文件创建
|
|
203
|
+
output_file = tmp_path / "test.json"
|
|
204
|
+
assert output_file.exists()
|
|
205
|
+
|
|
206
|
+
# 验证文件内容
|
|
207
|
+
with open(output_file, encoding="utf-8") as f:
|
|
208
|
+
written_data = json.load(f)
|
|
209
|
+
assert written_data == data
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def test_local_destination_write_with_stage(tmp_path):
|
|
213
|
+
"""测试写入中间结果(带 stage)"""
|
|
214
|
+
dest = LocalDestination(output_dir=str(tmp_path))
|
|
215
|
+
|
|
216
|
+
data = [{"element_id": "1", "text": "parsed text"}]
|
|
217
|
+
metadata = {"filename": "test.pdf", "stage": "parse"}
|
|
218
|
+
|
|
219
|
+
result = dest.write(data, metadata)
|
|
220
|
+
|
|
221
|
+
assert result is True
|
|
222
|
+
|
|
223
|
+
# 验证文件名包含 stage
|
|
224
|
+
output_file = tmp_path / "test_parse.json"
|
|
225
|
+
assert output_file.exists()
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def test_local_destination_write_without_filename(tmp_path):
|
|
229
|
+
"""测试写入时没有 filename(使用默认值)"""
|
|
230
|
+
dest = LocalDestination(output_dir=str(tmp_path))
|
|
231
|
+
|
|
232
|
+
data = [{"element_id": "1"}]
|
|
233
|
+
metadata = {}
|
|
234
|
+
|
|
235
|
+
result = dest.write(data, metadata)
|
|
236
|
+
|
|
237
|
+
assert result is True
|
|
238
|
+
|
|
239
|
+
# 验证使用默认文件名
|
|
240
|
+
output_file = tmp_path / "output.json"
|
|
241
|
+
assert output_file.exists()
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def test_local_destination_context_manager(tmp_path):
|
|
245
|
+
"""测试上下文管理器"""
|
|
246
|
+
with LocalDestination(output_dir=str(tmp_path)) as dest:
|
|
247
|
+
assert dest is not None
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
# ============================================================================
|
|
251
|
+
# 集成测试
|
|
252
|
+
# ============================================================================
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
def test_local_source_and_destination_integration(tmp_path):
|
|
256
|
+
"""测试 LocalSource 和 LocalDestination 集成"""
|
|
257
|
+
# 设置输入目录
|
|
258
|
+
input_dir = tmp_path / "input"
|
|
259
|
+
input_dir.mkdir()
|
|
260
|
+
(input_dir / "file1.pdf").write_text("test1")
|
|
261
|
+
(input_dir / "file2.pdf").write_text("test2")
|
|
262
|
+
|
|
263
|
+
# 设置输出目录
|
|
264
|
+
output_dir = tmp_path / "output"
|
|
265
|
+
|
|
266
|
+
# 创建 source 和 destination
|
|
267
|
+
source = LocalSource(directory=str(input_dir), pattern=["*.pdf"])
|
|
268
|
+
dest = LocalDestination(output_dir=str(output_dir))
|
|
269
|
+
|
|
270
|
+
# 读取并写入
|
|
271
|
+
files = source.list_files()
|
|
272
|
+
assert len(files) == 2
|
|
273
|
+
|
|
274
|
+
for file_path in files:
|
|
275
|
+
content, data_source = source.read_file(file_path)
|
|
276
|
+
|
|
277
|
+
# 模拟处理结果
|
|
278
|
+
elements = [
|
|
279
|
+
{
|
|
280
|
+
"element_id": "1",
|
|
281
|
+
"text": content.decode("utf-8"),
|
|
282
|
+
"data_source": data_source
|
|
283
|
+
}
|
|
284
|
+
]
|
|
285
|
+
|
|
286
|
+
dest.write(elements, {"filename": file_path})
|
|
287
|
+
|
|
288
|
+
# 验证输出
|
|
289
|
+
output_files = list(output_dir.glob("*.json"))
|
|
290
|
+
assert len(output_files) == 2
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
def test_local_source_read_file_permission_error(tmp_path):
|
|
294
|
+
"""测试 read_file 遇到权限错误"""
|
|
295
|
+
from xparse_client.exceptions import SourceError
|
|
296
|
+
|
|
297
|
+
# 创建测试文件
|
|
298
|
+
test_file = tmp_path / "test.txt"
|
|
299
|
+
test_file.write_text("test content")
|
|
300
|
+
|
|
301
|
+
source = LocalSource(directory=str(tmp_path))
|
|
302
|
+
|
|
303
|
+
# Mock open 抛出 PermissionError
|
|
304
|
+
from unittest.mock import patch
|
|
305
|
+
with patch('builtins.open', side_effect=PermissionError("Permission denied")):
|
|
306
|
+
with pytest.raises(SourceError) as exc_info:
|
|
307
|
+
source.read_file("test.txt")
|
|
308
|
+
|
|
309
|
+
assert "无权限读取文件" in str(exc_info.value) or "Permission" in str(exc_info.value)
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
def test_local_source_read_file_general_error(tmp_path):
|
|
313
|
+
"""测试 read_file 遇到一般错误"""
|
|
314
|
+
from xparse_client.exceptions import SourceError
|
|
315
|
+
|
|
316
|
+
source = LocalSource(directory=str(tmp_path))
|
|
317
|
+
|
|
318
|
+
# Mock open 抛出一般 Exception
|
|
319
|
+
from unittest.mock import patch
|
|
320
|
+
with patch('builtins.open', side_effect=Exception("Unknown error")):
|
|
321
|
+
with pytest.raises(SourceError) as exc_info:
|
|
322
|
+
source.read_file("test.txt")
|
|
323
|
+
|
|
324
|
+
assert "读取文件失败" in str(exc_info.value) or "Unknown" in str(exc_info.value)
|
|
@@ -0,0 +1,368 @@
|
|
|
1
|
+
"""MilvusDestination 测试
|
|
2
|
+
|
|
3
|
+
测试 Milvus 向量数据库 Destination。使用 mock 模式测试,无需真实 Milvus 实例。
|
|
4
|
+
|
|
5
|
+
运行方式:
|
|
6
|
+
pytest tests/unit/connectors/test_milvus.py -v
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from unittest.mock import MagicMock, patch
|
|
10
|
+
|
|
11
|
+
import pytest
|
|
12
|
+
|
|
13
|
+
from xparse_client.connectors import MilvusDestination
|
|
14
|
+
from xparse_client.exceptions import DestinationError
|
|
15
|
+
|
|
16
|
+
# ============================================================================
|
|
17
|
+
# Mock 辅助函数
|
|
18
|
+
# ============================================================================
|
|
19
|
+
|
|
20
|
+
def _setup_milvus_mock(mock_get_pymilvus):
|
|
21
|
+
"""设置 Milvus mock 的辅助函数"""
|
|
22
|
+
mock_milvus_client_class = MagicMock()
|
|
23
|
+
mock_data_type = MagicMock()
|
|
24
|
+
mock_client_instance = MagicMock()
|
|
25
|
+
|
|
26
|
+
# MilvusClient() 返回 mock 实例
|
|
27
|
+
mock_milvus_client_class.return_value = mock_client_instance
|
|
28
|
+
|
|
29
|
+
# _get_pymilvus() 返回 (MilvusClient, DataType)
|
|
30
|
+
mock_get_pymilvus.return_value = (mock_milvus_client_class, mock_data_type)
|
|
31
|
+
|
|
32
|
+
return mock_client_instance, mock_milvus_client_class, mock_data_type
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
# ============================================================================
|
|
36
|
+
# MilvusDestination 测试
|
|
37
|
+
# ============================================================================
|
|
38
|
+
|
|
39
|
+
@patch('xparse_client.connectors.destinations.milvus._get_pymilvus')
|
|
40
|
+
def test_milvus_destination_initialization(mock_get_pymilvus):
|
|
41
|
+
"""测试 MilvusDestination 初始化"""
|
|
42
|
+
mock_client, _, _ = _setup_milvus_mock(mock_get_pymilvus)
|
|
43
|
+
|
|
44
|
+
# Mock list_collections
|
|
45
|
+
mock_client.list_collections.return_value = []
|
|
46
|
+
|
|
47
|
+
dest = MilvusDestination(
|
|
48
|
+
db_path="milvus.db",
|
|
49
|
+
collection_name="test_collection",
|
|
50
|
+
dimension=1024
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
assert dest.db_path == "milvus.db"
|
|
54
|
+
assert dest.collection_name == "test_collection"
|
|
55
|
+
assert dest.dimension == 1024
|
|
56
|
+
|
|
57
|
+
# collection 不存在时应该创建
|
|
58
|
+
# 但我们 mock 的 list_collections 返回空,所以需要检查创建逻辑
|
|
59
|
+
# 实际实现中,不存在时会创建
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@patch('xparse_client.connectors.destinations.milvus._get_pymilvus')
|
|
63
|
+
def test_milvus_destination_with_api_key(mock_get_pymilvus):
|
|
64
|
+
"""测试使用 API key 初始化"""
|
|
65
|
+
mock_client, mock_class, _ = _setup_milvus_mock(mock_get_pymilvus)
|
|
66
|
+
mock_client.list_collections.return_value = []
|
|
67
|
+
|
|
68
|
+
MilvusDestination(
|
|
69
|
+
db_path="https://zilliz-cloud.com",
|
|
70
|
+
collection_name="test_collection",
|
|
71
|
+
dimension=1024,
|
|
72
|
+
api_key="test-api-key"
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
# 验证 API key 被传递
|
|
76
|
+
call_args = mock_class.call_args
|
|
77
|
+
assert call_args[1]["token"] == "test-api-key"
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
@patch('xparse_client.connectors.destinations.milvus._get_pymilvus')
|
|
81
|
+
def test_milvus_destination_collection_exists(mock_get_pymilvus):
|
|
82
|
+
"""测试 collection 已存在的情况"""
|
|
83
|
+
mock_client, _, _ = _setup_milvus_mock(mock_get_pymilvus)
|
|
84
|
+
|
|
85
|
+
# Mock collection 已存在
|
|
86
|
+
mock_client.list_collections.return_value = ["test_collection"]
|
|
87
|
+
|
|
88
|
+
MilvusDestination(
|
|
89
|
+
db_path="milvus.db",
|
|
90
|
+
collection_name="test_collection",
|
|
91
|
+
dimension=1024
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
# 应该不创建新的 collection
|
|
95
|
+
assert not mock_client.create_collection.called
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
@patch('xparse_client.connectors.destinations.milvus._get_pymilvus')
|
|
99
|
+
def test_milvus_destination_write_with_embeddings(mock_get_pymilvus):
|
|
100
|
+
"""测试写入带 embeddings 的数据"""
|
|
101
|
+
mock_client, _, _ = _setup_milvus_mock(mock_get_pymilvus)
|
|
102
|
+
mock_client.list_collections.return_value = ["test_collection"]
|
|
103
|
+
|
|
104
|
+
dest = MilvusDestination(
|
|
105
|
+
db_path="milvus.db",
|
|
106
|
+
collection_name="test_collection",
|
|
107
|
+
dimension=1024
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
data = [
|
|
111
|
+
{
|
|
112
|
+
"element_id": "elem_001",
|
|
113
|
+
"text": "Test text",
|
|
114
|
+
"embeddings": [0.1] * 1024,
|
|
115
|
+
"metadata": {"page": 1}
|
|
116
|
+
}
|
|
117
|
+
]
|
|
118
|
+
metadata = {"filename": "test.pdf", "record_id": "rec_123"}
|
|
119
|
+
|
|
120
|
+
result = dest.write(data, metadata)
|
|
121
|
+
|
|
122
|
+
assert result is True
|
|
123
|
+
|
|
124
|
+
# 验证 insert 被调用
|
|
125
|
+
mock_client.insert.assert_called_once()
|
|
126
|
+
call_args = mock_client.insert.call_args
|
|
127
|
+
|
|
128
|
+
# 验证 collection_name
|
|
129
|
+
assert call_args[1]["collection_name"] == "test_collection"
|
|
130
|
+
|
|
131
|
+
# 验证插入的数据包含必需字段
|
|
132
|
+
inserted_data = call_args[1]["data"]
|
|
133
|
+
assert len(inserted_data) == 1
|
|
134
|
+
assert inserted_data[0]["element_id"] == "elem_001"
|
|
135
|
+
assert inserted_data[0]["record_id"] == "rec_123"
|
|
136
|
+
assert len(inserted_data[0]["embeddings"]) == 1024
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
@patch('xparse_client.connectors.destinations.milvus._get_pymilvus')
|
|
140
|
+
def test_milvus_destination_write_missing_embeddings(mock_get_pymilvus):
|
|
141
|
+
"""测试写入缺少 embeddings 的数据(应该返回 False)"""
|
|
142
|
+
mock_client, _, _ = _setup_milvus_mock(mock_get_pymilvus)
|
|
143
|
+
mock_client.list_collections.return_value = ["test_collection"]
|
|
144
|
+
|
|
145
|
+
dest = MilvusDestination(
|
|
146
|
+
db_path="milvus.db",
|
|
147
|
+
collection_name="test_collection",
|
|
148
|
+
dimension=1024
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
# 缺少 embeddings 的数据
|
|
152
|
+
data = [
|
|
153
|
+
{
|
|
154
|
+
"element_id": "elem_001",
|
|
155
|
+
"text": "Test text"
|
|
156
|
+
}
|
|
157
|
+
]
|
|
158
|
+
metadata = {"filename": "test.pdf", "record_id": "rec_123"}
|
|
159
|
+
|
|
160
|
+
# 应该返回 False(因为没有有效数据)
|
|
161
|
+
result = dest.write(data, metadata)
|
|
162
|
+
assert result is False
|
|
163
|
+
|
|
164
|
+
# insert 不应该被调用
|
|
165
|
+
assert not mock_client.insert.called
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
@patch('xparse_client.connectors.destinations.milvus._get_pymilvus')
|
|
169
|
+
def test_milvus_destination_write_wrong_dimension(mock_get_pymilvus):
|
|
170
|
+
"""测试写入错误维度的向量(仍然会尝试写入)"""
|
|
171
|
+
mock_client, _, _ = _setup_milvus_mock(mock_get_pymilvus)
|
|
172
|
+
mock_client.list_collections.return_value = ["test_collection"]
|
|
173
|
+
|
|
174
|
+
dest = MilvusDestination(
|
|
175
|
+
db_path="milvus.db",
|
|
176
|
+
collection_name="test_collection",
|
|
177
|
+
dimension=1024
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
# 错误维度的 embeddings
|
|
181
|
+
data = [
|
|
182
|
+
{
|
|
183
|
+
"element_id": "elem_001",
|
|
184
|
+
"text": "Test text",
|
|
185
|
+
"embeddings": [0.1] * 512 # 应该是 1024
|
|
186
|
+
}
|
|
187
|
+
]
|
|
188
|
+
metadata = {"filename": "test.pdf", "record_id": "rec_123"}
|
|
189
|
+
|
|
190
|
+
# 实现中会尝试写入(由数据库层面检查维度)
|
|
191
|
+
result = dest.write(data, metadata)
|
|
192
|
+
assert result is True
|
|
193
|
+
|
|
194
|
+
# insert 会被调用
|
|
195
|
+
assert mock_client.insert.called
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
@patch('xparse_client.connectors.destinations.milvus._get_pymilvus')
|
|
199
|
+
def test_milvus_destination_batch_write(mock_get_pymilvus):
|
|
200
|
+
"""测试批量写入"""
|
|
201
|
+
mock_client, _, _ = _setup_milvus_mock(mock_get_pymilvus)
|
|
202
|
+
mock_client.list_collections.return_value = ["test_collection"]
|
|
203
|
+
|
|
204
|
+
dest = MilvusDestination(
|
|
205
|
+
db_path="milvus.db",
|
|
206
|
+
collection_name="test_collection",
|
|
207
|
+
dimension=1024
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
# 多条数据
|
|
211
|
+
data = [
|
|
212
|
+
{
|
|
213
|
+
"element_id": f"elem_{i:03d}",
|
|
214
|
+
"text": f"Text {i}",
|
|
215
|
+
"embeddings": [0.1 * i] * 1024
|
|
216
|
+
}
|
|
217
|
+
for i in range(10)
|
|
218
|
+
]
|
|
219
|
+
metadata = {"filename": "test.pdf", "record_id": "rec_123"}
|
|
220
|
+
|
|
221
|
+
result = dest.write(data, metadata)
|
|
222
|
+
|
|
223
|
+
assert result is True
|
|
224
|
+
|
|
225
|
+
# 验证 insert 被调用
|
|
226
|
+
mock_client.insert.assert_called_once()
|
|
227
|
+
call_args = mock_client.insert.call_args
|
|
228
|
+
inserted_data = call_args[1]["data"]
|
|
229
|
+
assert len(inserted_data) == 10
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
@patch('xparse_client.connectors.destinations.milvus._get_pymilvus')
|
|
233
|
+
def test_milvus_destination_connection_error(mock_get_pymilvus):
|
|
234
|
+
"""测试连接错误"""
|
|
235
|
+
mock_client, mock_class, _ = _setup_milvus_mock(mock_get_pymilvus)
|
|
236
|
+
|
|
237
|
+
# Mock 客户端创建失败
|
|
238
|
+
mock_class.side_effect = Exception("Connection failed")
|
|
239
|
+
|
|
240
|
+
with pytest.raises(DestinationError) as exc_info:
|
|
241
|
+
MilvusDestination(
|
|
242
|
+
db_path="invalid-path",
|
|
243
|
+
collection_name="test_collection",
|
|
244
|
+
dimension=1024
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
assert "初始化" in str(exc_info.value) or "连接" in str(exc_info.value)
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
@patch('xparse_client.connectors.destinations.milvus._get_pymilvus')
|
|
251
|
+
def test_milvus_destination_write_error(mock_get_pymilvus):
|
|
252
|
+
"""测试写入错误"""
|
|
253
|
+
mock_client, _, _ = _setup_milvus_mock(mock_get_pymilvus)
|
|
254
|
+
mock_client.list_collections.return_value = ["test_collection"]
|
|
255
|
+
|
|
256
|
+
# Mock insert 失败
|
|
257
|
+
mock_client.insert.side_effect = Exception("Write failed")
|
|
258
|
+
|
|
259
|
+
dest = MilvusDestination(
|
|
260
|
+
db_path="milvus.db",
|
|
261
|
+
collection_name="test_collection",
|
|
262
|
+
dimension=1024
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
data = [
|
|
266
|
+
{
|
|
267
|
+
"element_id": "elem_001",
|
|
268
|
+
"text": "Test",
|
|
269
|
+
"embeddings": [0.1] * 1024
|
|
270
|
+
}
|
|
271
|
+
]
|
|
272
|
+
metadata = {"filename": "test.pdf", "record_id": "rec_123"}
|
|
273
|
+
|
|
274
|
+
with pytest.raises(DestinationError) as exc_info:
|
|
275
|
+
dest.write(data, metadata)
|
|
276
|
+
|
|
277
|
+
assert "写入" in str(exc_info.value) or "Write" in str(exc_info.value)
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
@patch('xparse_client.connectors.destinations.milvus._get_pymilvus')
|
|
281
|
+
def test_milvus_destination_context_manager(mock_get_pymilvus):
|
|
282
|
+
"""测试上下文管理器"""
|
|
283
|
+
mock_client, _, _ = _setup_milvus_mock(mock_get_pymilvus)
|
|
284
|
+
mock_client.list_collections.return_value = []
|
|
285
|
+
|
|
286
|
+
with MilvusDestination(
|
|
287
|
+
db_path="milvus.db",
|
|
288
|
+
collection_name="test_collection",
|
|
289
|
+
dimension=1024
|
|
290
|
+
) as dest:
|
|
291
|
+
assert dest is not None
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
@patch('xparse_client.connectors.destinations.milvus._get_pymilvus')
|
|
295
|
+
def test_milvus_destination_repr(mock_get_pymilvus):
|
|
296
|
+
"""测试 __repr__"""
|
|
297
|
+
mock_client, _, _ = _setup_milvus_mock(mock_get_pymilvus)
|
|
298
|
+
mock_client.list_collections.return_value = []
|
|
299
|
+
|
|
300
|
+
dest = MilvusDestination(
|
|
301
|
+
db_path="milvus.db",
|
|
302
|
+
collection_name="test_collection",
|
|
303
|
+
dimension=1024
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
assert repr(dest) == "<MilvusDestination db_path=milvus.db collection=test_collection>"
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
# ============================================================================
|
|
310
|
+
# 懒加载失败测试
|
|
311
|
+
# ============================================================================
|
|
312
|
+
|
|
313
|
+
@patch('xparse_client.connectors.destinations.milvus._get_pymilvus')
|
|
314
|
+
def test_milvus_import_error(mock_get_pymilvus):
|
|
315
|
+
"""测试 pymilvus 未安装时的 ImportError"""
|
|
316
|
+
# Mock ImportError
|
|
317
|
+
mock_get_pymilvus.side_effect = ImportError(
|
|
318
|
+
"使用 MilvusDestination 需要安装 pymilvus: pip install xparse-client[milvus]"
|
|
319
|
+
)
|
|
320
|
+
|
|
321
|
+
with pytest.raises(ImportError) as exc_info:
|
|
322
|
+
MilvusDestination(
|
|
323
|
+
db_path="milvus.db",
|
|
324
|
+
collection_name="test_collection",
|
|
325
|
+
dimension=1024
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
assert "pymilvus" in str(exc_info.value)
|
|
329
|
+
assert "xparse-client[milvus]" in str(exc_info.value)
|
|
330
|
+
|
|
331
|
+
|
|
332
|
+
@patch('xparse_client.connectors.destinations.milvus._get_pymilvus')
|
|
333
|
+
def test_milvus_collection_creation_error(mock_get_pymilvus):
|
|
334
|
+
"""测试集合创建失败的场景"""
|
|
335
|
+
mock_client, _, mock_data_type = _setup_milvus_mock(mock_get_pymilvus)
|
|
336
|
+
|
|
337
|
+
# Mock collection 不存在
|
|
338
|
+
mock_client.has_collection.return_value = False
|
|
339
|
+
|
|
340
|
+
# Mock create_collection 失败
|
|
341
|
+
mock_client.create_collection.side_effect = Exception("Schema error")
|
|
342
|
+
|
|
343
|
+
with pytest.raises(DestinationError) as exc_info:
|
|
344
|
+
MilvusDestination(
|
|
345
|
+
db_path="milvus.db",
|
|
346
|
+
collection_name="test_collection",
|
|
347
|
+
dimension=1024
|
|
348
|
+
)
|
|
349
|
+
|
|
350
|
+
assert "初始化" in str(exc_info.value) or "创建" in str(exc_info.value) or "Schema" in str(exc_info.value)
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
@patch('xparse_client.connectors.destinations.milvus._get_pymilvus')
|
|
354
|
+
def test_milvus_list_collections_error(mock_get_pymilvus):
|
|
355
|
+
"""测试 has_collection 失败的场景"""
|
|
356
|
+
mock_client, _, _ = _setup_milvus_mock(mock_get_pymilvus)
|
|
357
|
+
|
|
358
|
+
# Mock has_collection 失败
|
|
359
|
+
mock_client.has_collection.side_effect = Exception("Connection error")
|
|
360
|
+
|
|
361
|
+
with pytest.raises(DestinationError) as exc_info:
|
|
362
|
+
MilvusDestination(
|
|
363
|
+
db_path="milvus.db",
|
|
364
|
+
collection_name="test_collection",
|
|
365
|
+
dimension=1024
|
|
366
|
+
)
|
|
367
|
+
|
|
368
|
+
assert "初始化" in str(exc_info.value) or "连接" in str(exc_info.value) or "Connection" in str(exc_info.value)
|