xparse-client 0.2.19__py3-none-any.whl → 0.3.0b3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. example/1_basic_api_usage.py +198 -0
  2. example/2_async_job.py +210 -0
  3. example/3_local_workflow.py +300 -0
  4. example/4_advanced_workflow.py +327 -0
  5. example/README.md +128 -0
  6. example/config_example.json +95 -0
  7. tests/conftest.py +310 -0
  8. tests/unit/__init__.py +1 -0
  9. tests/unit/api/__init__.py +1 -0
  10. tests/unit/api/test_extract.py +232 -0
  11. tests/unit/api/test_local.py +231 -0
  12. tests/unit/api/test_parse.py +374 -0
  13. tests/unit/api/test_pipeline.py +369 -0
  14. tests/unit/api/test_workflows.py +108 -0
  15. tests/unit/connectors/test_ftp.py +525 -0
  16. tests/unit/connectors/test_local_connectors.py +324 -0
  17. tests/unit/connectors/test_milvus.py +368 -0
  18. tests/unit/connectors/test_qdrant.py +399 -0
  19. tests/unit/connectors/test_s3.py +598 -0
  20. tests/unit/connectors/test_smb.py +442 -0
  21. tests/unit/connectors/test_utils.py +335 -0
  22. tests/unit/models/test_local.py +54 -0
  23. tests/unit/models/test_pipeline_stages.py +144 -0
  24. tests/unit/models/test_workflows.py +55 -0
  25. tests/unit/test_base.py +437 -0
  26. tests/unit/test_client.py +110 -0
  27. tests/unit/test_config.py +160 -0
  28. tests/unit/test_exceptions.py +182 -0
  29. tests/unit/test_http.py +562 -0
  30. xparse_client/__init__.py +111 -20
  31. xparse_client/_base.py +179 -0
  32. xparse_client/_client.py +218 -0
  33. xparse_client/_config.py +221 -0
  34. xparse_client/_http.py +350 -0
  35. xparse_client/api/__init__.py +14 -0
  36. xparse_client/api/extract.py +109 -0
  37. xparse_client/api/local.py +215 -0
  38. xparse_client/api/parse.py +209 -0
  39. xparse_client/api/pipeline.py +134 -0
  40. xparse_client/api/workflows.py +204 -0
  41. xparse_client/connectors/__init__.py +45 -0
  42. xparse_client/connectors/_utils.py +138 -0
  43. xparse_client/connectors/destinations/__init__.py +45 -0
  44. xparse_client/connectors/destinations/base.py +116 -0
  45. xparse_client/connectors/destinations/local.py +91 -0
  46. xparse_client/connectors/destinations/milvus.py +229 -0
  47. xparse_client/connectors/destinations/qdrant.py +238 -0
  48. xparse_client/connectors/destinations/s3.py +163 -0
  49. xparse_client/connectors/sources/__init__.py +45 -0
  50. xparse_client/connectors/sources/base.py +74 -0
  51. xparse_client/connectors/sources/ftp.py +278 -0
  52. xparse_client/connectors/sources/local.py +176 -0
  53. xparse_client/connectors/sources/s3.py +232 -0
  54. xparse_client/connectors/sources/smb.py +259 -0
  55. xparse_client/exceptions.py +398 -0
  56. xparse_client/models/__init__.py +60 -0
  57. xparse_client/models/chunk.py +39 -0
  58. xparse_client/models/embed.py +62 -0
  59. xparse_client/models/extract.py +41 -0
  60. xparse_client/models/local.py +38 -0
  61. xparse_client/models/parse.py +136 -0
  62. xparse_client/models/pipeline.py +134 -0
  63. xparse_client/models/workflows.py +74 -0
  64. xparse_client-0.3.0b3.dist-info/METADATA +1075 -0
  65. xparse_client-0.3.0b3.dist-info/RECORD +68 -0
  66. {xparse_client-0.2.19.dist-info → xparse_client-0.3.0b3.dist-info}/WHEEL +1 -1
  67. {xparse_client-0.2.19.dist-info → xparse_client-0.3.0b3.dist-info}/licenses/LICENSE +1 -1
  68. {xparse_client-0.2.19.dist-info → xparse_client-0.3.0b3.dist-info}/top_level.txt +2 -0
  69. xparse_client/pipeline/__init__.py +0 -3
  70. xparse_client/pipeline/config.py +0 -129
  71. xparse_client/pipeline/destinations.py +0 -489
  72. xparse_client/pipeline/pipeline.py +0 -690
  73. xparse_client/pipeline/sources.py +0 -583
  74. xparse_client-0.2.19.dist-info/METADATA +0 -1050
  75. xparse_client-0.2.19.dist-info/RECORD +0 -11
@@ -0,0 +1,324 @@
1
+ """LocalSource 和 LocalDestination 集成测试
2
+
3
+ 测试本地文件系统 Source 和 Destination,包括:
4
+ - LocalSource: 文件列表、文件读取、pattern 匹配、递归搜索
5
+ - LocalDestination: 文件写入、元数据处理、中间结果
6
+ - 错误处理:目录不存在、文件不存在、权限错误
7
+ """
8
+
9
+ import json
10
+
11
+ import pytest
12
+
13
+ from xparse_client.connectors import LocalDestination, LocalSource
14
+ from xparse_client.exceptions import SourceError
15
+
16
+ # ============================================================================
17
+ # LocalSource 测试
18
+ # ============================================================================
19
+
20
+
21
+ def test_local_source_initialization(tmp_path):
22
+ """测试 LocalSource 正确初始化"""
23
+ source = LocalSource(directory=str(tmp_path))
24
+
25
+ assert source.directory == tmp_path
26
+ assert source.pattern is None # None 表示匹配所有文件
27
+ assert source.recursive is False
28
+
29
+
30
+ def test_local_source_directory_not_exist():
31
+ """测试目录不存在时抛出异常"""
32
+ with pytest.raises(SourceError) as exc_info:
33
+ LocalSource(directory="/nonexistent/directory")
34
+
35
+ assert "目录不存在" in str(exc_info.value)
36
+ assert exc_info.value.connector_type == "local"
37
+
38
+
39
+ def test_local_source_not_a_directory(tmp_path):
40
+ """测试路径不是目录时抛出异常"""
41
+ # 创建一个文件
42
+ file_path = tmp_path / "file.txt"
43
+ file_path.write_text("test")
44
+
45
+ with pytest.raises(SourceError) as exc_info:
46
+ LocalSource(directory=str(file_path))
47
+
48
+ assert "不是目录" in str(exc_info.value)
49
+
50
+
51
+ def test_local_source_list_files_empty(tmp_path):
52
+ """测试列出空目录"""
53
+ source = LocalSource(directory=str(tmp_path))
54
+ files = source.list_files()
55
+
56
+ assert files == []
57
+
58
+
59
+ def test_local_source_list_files_basic(tmp_path):
60
+ """测试列出文件(非递归)"""
61
+ # 创建测试文件
62
+ (tmp_path / "file1.pdf").write_text("test1")
63
+ (tmp_path / "file2.docx").write_text("test2")
64
+ (tmp_path / "file3.txt").write_text("test3")
65
+
66
+ # 创建子目录文件(不应该被列出)
67
+ subdir = tmp_path / "subdir"
68
+ subdir.mkdir()
69
+ (subdir / "file4.pdf").write_text("test4")
70
+
71
+ source = LocalSource(directory=str(tmp_path))
72
+ files = source.list_files()
73
+
74
+ # 不递归,只列出顶层文件
75
+ assert len(files) == 3
76
+ assert "file1.pdf" in files
77
+ assert "file2.docx" in files
78
+ assert "file3.txt" in files
79
+ assert "subdir/file4.pdf" not in files
80
+
81
+
82
+ def test_local_source_list_files_with_pattern(tmp_path):
83
+ """测试 pattern 过滤"""
84
+ # 创建测试文件
85
+ (tmp_path / "file1.pdf").write_text("test1")
86
+ (tmp_path / "file2.docx").write_text("test2")
87
+ (tmp_path / "file3.txt").write_text("test3")
88
+
89
+ source = LocalSource(directory=str(tmp_path), pattern=["*.pdf", "*.docx"])
90
+ files = source.list_files()
91
+
92
+ assert len(files) == 2
93
+ assert "file1.pdf" in files
94
+ assert "file2.docx" in files
95
+ assert "file3.txt" not in files
96
+
97
+
98
+ def test_local_source_list_files_recursive(tmp_path):
99
+ """测试递归列出文件"""
100
+ # 创建测试文件
101
+ (tmp_path / "file1.pdf").write_text("test1")
102
+
103
+ # 创建子目录和文件
104
+ subdir = tmp_path / "subdir"
105
+ subdir.mkdir()
106
+ (subdir / "file2.pdf").write_text("test2")
107
+
108
+ subdir2 = subdir / "subdir2"
109
+ subdir2.mkdir()
110
+ (subdir2 / "file3.pdf").write_text("test3")
111
+
112
+ source = LocalSource(directory=str(tmp_path), recursive=True)
113
+ files = source.list_files()
114
+
115
+ assert len(files) == 3
116
+ assert "file1.pdf" in files
117
+ assert "subdir/file2.pdf" in files
118
+ assert "subdir/subdir2/file3.pdf" in files
119
+
120
+
121
+ def test_local_source_read_file(tmp_path):
122
+ """测试读取文件"""
123
+ # 创建测试文件
124
+ test_file = tmp_path / "test.pdf"
125
+ test_content = b"PDF content"
126
+ test_file.write_bytes(test_content)
127
+
128
+ source = LocalSource(directory=str(tmp_path))
129
+ file_bytes, data_source = source.read_file("test.pdf")
130
+
131
+ assert file_bytes == test_content
132
+ assert "url" in data_source
133
+ assert "version" in data_source
134
+ assert "date_created" in data_source
135
+ assert "date_modified" in data_source
136
+ assert data_source["record_locator"]["protocol"] == "file"
137
+
138
+
139
+ def test_local_source_read_file_not_found(tmp_path):
140
+ """测试读取不存在的文件"""
141
+ source = LocalSource(directory=str(tmp_path))
142
+
143
+ with pytest.raises(SourceError) as exc_info:
144
+ source.read_file("nonexistent.pdf")
145
+
146
+ assert "不存在" in str(exc_info.value)
147
+ assert exc_info.value.connector_type == "local"
148
+
149
+
150
+ def test_local_source_read_file_path_traversal(tmp_path):
151
+ """测试路径遍历攻击防护"""
152
+ source = LocalSource(directory=str(tmp_path))
153
+
154
+ with pytest.raises(SourceError) as exc_info:
155
+ source.read_file("../../../etc/passwd")
156
+
157
+ assert "非法路径" in str(exc_info.value)
158
+
159
+
160
+ def test_local_source_context_manager(tmp_path):
161
+ """测试上下文管理器"""
162
+ with LocalSource(directory=str(tmp_path)) as source:
163
+ assert source is not None
164
+
165
+
166
+ # ============================================================================
167
+ # LocalDestination 测试
168
+ # ============================================================================
169
+
170
+
171
+ def test_local_destination_initialization(tmp_path):
172
+ """测试 LocalDestination 正确初始化"""
173
+ output_dir = tmp_path / "output"
174
+ dest = LocalDestination(output_dir=str(output_dir))
175
+
176
+ assert dest.output_dir == output_dir
177
+ assert output_dir.exists()
178
+
179
+
180
+ def test_local_destination_create_nested_directory(tmp_path):
181
+ """测试创建嵌套目录"""
182
+ output_dir = tmp_path / "level1" / "level2" / "output"
183
+ dest = LocalDestination(output_dir=str(output_dir))
184
+
185
+ assert dest.output_dir.exists()
186
+
187
+
188
+ def test_local_destination_write_basic(tmp_path):
189
+ """测试写入基础数据"""
190
+ dest = LocalDestination(output_dir=str(tmp_path))
191
+
192
+ data = [
193
+ {"element_id": "1", "text": "test1"},
194
+ {"element_id": "2", "text": "test2"},
195
+ ]
196
+ metadata = {"filename": "test.pdf"}
197
+
198
+ result = dest.write(data, metadata)
199
+
200
+ assert result is True
201
+
202
+ # 验证文件创建
203
+ output_file = tmp_path / "test.json"
204
+ assert output_file.exists()
205
+
206
+ # 验证文件内容
207
+ with open(output_file, encoding="utf-8") as f:
208
+ written_data = json.load(f)
209
+ assert written_data == data
210
+
211
+
212
+ def test_local_destination_write_with_stage(tmp_path):
213
+ """测试写入中间结果(带 stage)"""
214
+ dest = LocalDestination(output_dir=str(tmp_path))
215
+
216
+ data = [{"element_id": "1", "text": "parsed text"}]
217
+ metadata = {"filename": "test.pdf", "stage": "parse"}
218
+
219
+ result = dest.write(data, metadata)
220
+
221
+ assert result is True
222
+
223
+ # 验证文件名包含 stage
224
+ output_file = tmp_path / "test_parse.json"
225
+ assert output_file.exists()
226
+
227
+
228
+ def test_local_destination_write_without_filename(tmp_path):
229
+ """测试写入时没有 filename(使用默认值)"""
230
+ dest = LocalDestination(output_dir=str(tmp_path))
231
+
232
+ data = [{"element_id": "1"}]
233
+ metadata = {}
234
+
235
+ result = dest.write(data, metadata)
236
+
237
+ assert result is True
238
+
239
+ # 验证使用默认文件名
240
+ output_file = tmp_path / "output.json"
241
+ assert output_file.exists()
242
+
243
+
244
+ def test_local_destination_context_manager(tmp_path):
245
+ """测试上下文管理器"""
246
+ with LocalDestination(output_dir=str(tmp_path)) as dest:
247
+ assert dest is not None
248
+
249
+
250
+ # ============================================================================
251
+ # 集成测试
252
+ # ============================================================================
253
+
254
+
255
+ def test_local_source_and_destination_integration(tmp_path):
256
+ """测试 LocalSource 和 LocalDestination 集成"""
257
+ # 设置输入目录
258
+ input_dir = tmp_path / "input"
259
+ input_dir.mkdir()
260
+ (input_dir / "file1.pdf").write_text("test1")
261
+ (input_dir / "file2.pdf").write_text("test2")
262
+
263
+ # 设置输出目录
264
+ output_dir = tmp_path / "output"
265
+
266
+ # 创建 source 和 destination
267
+ source = LocalSource(directory=str(input_dir), pattern=["*.pdf"])
268
+ dest = LocalDestination(output_dir=str(output_dir))
269
+
270
+ # 读取并写入
271
+ files = source.list_files()
272
+ assert len(files) == 2
273
+
274
+ for file_path in files:
275
+ content, data_source = source.read_file(file_path)
276
+
277
+ # 模拟处理结果
278
+ elements = [
279
+ {
280
+ "element_id": "1",
281
+ "text": content.decode("utf-8"),
282
+ "data_source": data_source
283
+ }
284
+ ]
285
+
286
+ dest.write(elements, {"filename": file_path})
287
+
288
+ # 验证输出
289
+ output_files = list(output_dir.glob("*.json"))
290
+ assert len(output_files) == 2
291
+
292
+
293
+ def test_local_source_read_file_permission_error(tmp_path):
294
+ """测试 read_file 遇到权限错误"""
295
+ from xparse_client.exceptions import SourceError
296
+
297
+ # 创建测试文件
298
+ test_file = tmp_path / "test.txt"
299
+ test_file.write_text("test content")
300
+
301
+ source = LocalSource(directory=str(tmp_path))
302
+
303
+ # Mock open 抛出 PermissionError
304
+ from unittest.mock import patch
305
+ with patch('builtins.open', side_effect=PermissionError("Permission denied")):
306
+ with pytest.raises(SourceError) as exc_info:
307
+ source.read_file("test.txt")
308
+
309
+ assert "无权限读取文件" in str(exc_info.value) or "Permission" in str(exc_info.value)
310
+
311
+
312
+ def test_local_source_read_file_general_error(tmp_path):
313
+ """测试 read_file 遇到一般错误"""
314
+ from xparse_client.exceptions import SourceError
315
+
316
+ source = LocalSource(directory=str(tmp_path))
317
+
318
+ # Mock open 抛出一般 Exception
319
+ from unittest.mock import patch
320
+ with patch('builtins.open', side_effect=Exception("Unknown error")):
321
+ with pytest.raises(SourceError) as exc_info:
322
+ source.read_file("test.txt")
323
+
324
+ assert "读取文件失败" in str(exc_info.value) or "Unknown" in str(exc_info.value)
@@ -0,0 +1,368 @@
1
+ """MilvusDestination 测试
2
+
3
+ 测试 Milvus 向量数据库 Destination。使用 mock 模式测试,无需真实 Milvus 实例。
4
+
5
+ 运行方式:
6
+ pytest tests/unit/connectors/test_milvus.py -v
7
+ """
8
+
9
+ from unittest.mock import MagicMock, patch
10
+
11
+ import pytest
12
+
13
+ from xparse_client.connectors import MilvusDestination
14
+ from xparse_client.exceptions import DestinationError
15
+
16
+ # ============================================================================
17
+ # Mock 辅助函数
18
+ # ============================================================================
19
+
20
+ def _setup_milvus_mock(mock_get_pymilvus):
21
+ """设置 Milvus mock 的辅助函数"""
22
+ mock_milvus_client_class = MagicMock()
23
+ mock_data_type = MagicMock()
24
+ mock_client_instance = MagicMock()
25
+
26
+ # MilvusClient() 返回 mock 实例
27
+ mock_milvus_client_class.return_value = mock_client_instance
28
+
29
+ # _get_pymilvus() 返回 (MilvusClient, DataType)
30
+ mock_get_pymilvus.return_value = (mock_milvus_client_class, mock_data_type)
31
+
32
+ return mock_client_instance, mock_milvus_client_class, mock_data_type
33
+
34
+
35
+ # ============================================================================
36
+ # MilvusDestination 测试
37
+ # ============================================================================
38
+
39
+ @patch('xparse_client.connectors.destinations.milvus._get_pymilvus')
40
+ def test_milvus_destination_initialization(mock_get_pymilvus):
41
+ """测试 MilvusDestination 初始化"""
42
+ mock_client, _, _ = _setup_milvus_mock(mock_get_pymilvus)
43
+
44
+ # Mock list_collections
45
+ mock_client.list_collections.return_value = []
46
+
47
+ dest = MilvusDestination(
48
+ db_path="milvus.db",
49
+ collection_name="test_collection",
50
+ dimension=1024
51
+ )
52
+
53
+ assert dest.db_path == "milvus.db"
54
+ assert dest.collection_name == "test_collection"
55
+ assert dest.dimension == 1024
56
+
57
+ # collection 不存在时应该创建
58
+ # 但我们 mock 的 list_collections 返回空,所以需要检查创建逻辑
59
+ # 实际实现中,不存在时会创建
60
+
61
+
62
+ @patch('xparse_client.connectors.destinations.milvus._get_pymilvus')
63
+ def test_milvus_destination_with_api_key(mock_get_pymilvus):
64
+ """测试使用 API key 初始化"""
65
+ mock_client, mock_class, _ = _setup_milvus_mock(mock_get_pymilvus)
66
+ mock_client.list_collections.return_value = []
67
+
68
+ MilvusDestination(
69
+ db_path="https://zilliz-cloud.com",
70
+ collection_name="test_collection",
71
+ dimension=1024,
72
+ api_key="test-api-key"
73
+ )
74
+
75
+ # 验证 API key 被传递
76
+ call_args = mock_class.call_args
77
+ assert call_args[1]["token"] == "test-api-key"
78
+
79
+
80
+ @patch('xparse_client.connectors.destinations.milvus._get_pymilvus')
81
+ def test_milvus_destination_collection_exists(mock_get_pymilvus):
82
+ """测试 collection 已存在的情况"""
83
+ mock_client, _, _ = _setup_milvus_mock(mock_get_pymilvus)
84
+
85
+ # Mock collection 已存在
86
+ mock_client.list_collections.return_value = ["test_collection"]
87
+
88
+ MilvusDestination(
89
+ db_path="milvus.db",
90
+ collection_name="test_collection",
91
+ dimension=1024
92
+ )
93
+
94
+ # 应该不创建新的 collection
95
+ assert not mock_client.create_collection.called
96
+
97
+
98
+ @patch('xparse_client.connectors.destinations.milvus._get_pymilvus')
99
+ def test_milvus_destination_write_with_embeddings(mock_get_pymilvus):
100
+ """测试写入带 embeddings 的数据"""
101
+ mock_client, _, _ = _setup_milvus_mock(mock_get_pymilvus)
102
+ mock_client.list_collections.return_value = ["test_collection"]
103
+
104
+ dest = MilvusDestination(
105
+ db_path="milvus.db",
106
+ collection_name="test_collection",
107
+ dimension=1024
108
+ )
109
+
110
+ data = [
111
+ {
112
+ "element_id": "elem_001",
113
+ "text": "Test text",
114
+ "embeddings": [0.1] * 1024,
115
+ "metadata": {"page": 1}
116
+ }
117
+ ]
118
+ metadata = {"filename": "test.pdf", "record_id": "rec_123"}
119
+
120
+ result = dest.write(data, metadata)
121
+
122
+ assert result is True
123
+
124
+ # 验证 insert 被调用
125
+ mock_client.insert.assert_called_once()
126
+ call_args = mock_client.insert.call_args
127
+
128
+ # 验证 collection_name
129
+ assert call_args[1]["collection_name"] == "test_collection"
130
+
131
+ # 验证插入的数据包含必需字段
132
+ inserted_data = call_args[1]["data"]
133
+ assert len(inserted_data) == 1
134
+ assert inserted_data[0]["element_id"] == "elem_001"
135
+ assert inserted_data[0]["record_id"] == "rec_123"
136
+ assert len(inserted_data[0]["embeddings"]) == 1024
137
+
138
+
139
+ @patch('xparse_client.connectors.destinations.milvus._get_pymilvus')
140
+ def test_milvus_destination_write_missing_embeddings(mock_get_pymilvus):
141
+ """测试写入缺少 embeddings 的数据(应该返回 False)"""
142
+ mock_client, _, _ = _setup_milvus_mock(mock_get_pymilvus)
143
+ mock_client.list_collections.return_value = ["test_collection"]
144
+
145
+ dest = MilvusDestination(
146
+ db_path="milvus.db",
147
+ collection_name="test_collection",
148
+ dimension=1024
149
+ )
150
+
151
+ # 缺少 embeddings 的数据
152
+ data = [
153
+ {
154
+ "element_id": "elem_001",
155
+ "text": "Test text"
156
+ }
157
+ ]
158
+ metadata = {"filename": "test.pdf", "record_id": "rec_123"}
159
+
160
+ # 应该返回 False(因为没有有效数据)
161
+ result = dest.write(data, metadata)
162
+ assert result is False
163
+
164
+ # insert 不应该被调用
165
+ assert not mock_client.insert.called
166
+
167
+
168
+ @patch('xparse_client.connectors.destinations.milvus._get_pymilvus')
169
+ def test_milvus_destination_write_wrong_dimension(mock_get_pymilvus):
170
+ """测试写入错误维度的向量(仍然会尝试写入)"""
171
+ mock_client, _, _ = _setup_milvus_mock(mock_get_pymilvus)
172
+ mock_client.list_collections.return_value = ["test_collection"]
173
+
174
+ dest = MilvusDestination(
175
+ db_path="milvus.db",
176
+ collection_name="test_collection",
177
+ dimension=1024
178
+ )
179
+
180
+ # 错误维度的 embeddings
181
+ data = [
182
+ {
183
+ "element_id": "elem_001",
184
+ "text": "Test text",
185
+ "embeddings": [0.1] * 512 # 应该是 1024
186
+ }
187
+ ]
188
+ metadata = {"filename": "test.pdf", "record_id": "rec_123"}
189
+
190
+ # 实现中会尝试写入(由数据库层面检查维度)
191
+ result = dest.write(data, metadata)
192
+ assert result is True
193
+
194
+ # insert 会被调用
195
+ assert mock_client.insert.called
196
+
197
+
198
+ @patch('xparse_client.connectors.destinations.milvus._get_pymilvus')
199
+ def test_milvus_destination_batch_write(mock_get_pymilvus):
200
+ """测试批量写入"""
201
+ mock_client, _, _ = _setup_milvus_mock(mock_get_pymilvus)
202
+ mock_client.list_collections.return_value = ["test_collection"]
203
+
204
+ dest = MilvusDestination(
205
+ db_path="milvus.db",
206
+ collection_name="test_collection",
207
+ dimension=1024
208
+ )
209
+
210
+ # 多条数据
211
+ data = [
212
+ {
213
+ "element_id": f"elem_{i:03d}",
214
+ "text": f"Text {i}",
215
+ "embeddings": [0.1 * i] * 1024
216
+ }
217
+ for i in range(10)
218
+ ]
219
+ metadata = {"filename": "test.pdf", "record_id": "rec_123"}
220
+
221
+ result = dest.write(data, metadata)
222
+
223
+ assert result is True
224
+
225
+ # 验证 insert 被调用
226
+ mock_client.insert.assert_called_once()
227
+ call_args = mock_client.insert.call_args
228
+ inserted_data = call_args[1]["data"]
229
+ assert len(inserted_data) == 10
230
+
231
+
232
+ @patch('xparse_client.connectors.destinations.milvus._get_pymilvus')
233
+ def test_milvus_destination_connection_error(mock_get_pymilvus):
234
+ """测试连接错误"""
235
+ mock_client, mock_class, _ = _setup_milvus_mock(mock_get_pymilvus)
236
+
237
+ # Mock 客户端创建失败
238
+ mock_class.side_effect = Exception("Connection failed")
239
+
240
+ with pytest.raises(DestinationError) as exc_info:
241
+ MilvusDestination(
242
+ db_path="invalid-path",
243
+ collection_name="test_collection",
244
+ dimension=1024
245
+ )
246
+
247
+ assert "初始化" in str(exc_info.value) or "连接" in str(exc_info.value)
248
+
249
+
250
+ @patch('xparse_client.connectors.destinations.milvus._get_pymilvus')
251
+ def test_milvus_destination_write_error(mock_get_pymilvus):
252
+ """测试写入错误"""
253
+ mock_client, _, _ = _setup_milvus_mock(mock_get_pymilvus)
254
+ mock_client.list_collections.return_value = ["test_collection"]
255
+
256
+ # Mock insert 失败
257
+ mock_client.insert.side_effect = Exception("Write failed")
258
+
259
+ dest = MilvusDestination(
260
+ db_path="milvus.db",
261
+ collection_name="test_collection",
262
+ dimension=1024
263
+ )
264
+
265
+ data = [
266
+ {
267
+ "element_id": "elem_001",
268
+ "text": "Test",
269
+ "embeddings": [0.1] * 1024
270
+ }
271
+ ]
272
+ metadata = {"filename": "test.pdf", "record_id": "rec_123"}
273
+
274
+ with pytest.raises(DestinationError) as exc_info:
275
+ dest.write(data, metadata)
276
+
277
+ assert "写入" in str(exc_info.value) or "Write" in str(exc_info.value)
278
+
279
+
280
+ @patch('xparse_client.connectors.destinations.milvus._get_pymilvus')
281
+ def test_milvus_destination_context_manager(mock_get_pymilvus):
282
+ """测试上下文管理器"""
283
+ mock_client, _, _ = _setup_milvus_mock(mock_get_pymilvus)
284
+ mock_client.list_collections.return_value = []
285
+
286
+ with MilvusDestination(
287
+ db_path="milvus.db",
288
+ collection_name="test_collection",
289
+ dimension=1024
290
+ ) as dest:
291
+ assert dest is not None
292
+
293
+
294
+ @patch('xparse_client.connectors.destinations.milvus._get_pymilvus')
295
+ def test_milvus_destination_repr(mock_get_pymilvus):
296
+ """测试 __repr__"""
297
+ mock_client, _, _ = _setup_milvus_mock(mock_get_pymilvus)
298
+ mock_client.list_collections.return_value = []
299
+
300
+ dest = MilvusDestination(
301
+ db_path="milvus.db",
302
+ collection_name="test_collection",
303
+ dimension=1024
304
+ )
305
+
306
+ assert repr(dest) == "<MilvusDestination db_path=milvus.db collection=test_collection>"
307
+
308
+
309
+ # ============================================================================
310
+ # 懒加载失败测试
311
+ # ============================================================================
312
+
313
+ @patch('xparse_client.connectors.destinations.milvus._get_pymilvus')
314
+ def test_milvus_import_error(mock_get_pymilvus):
315
+ """测试 pymilvus 未安装时的 ImportError"""
316
+ # Mock ImportError
317
+ mock_get_pymilvus.side_effect = ImportError(
318
+ "使用 MilvusDestination 需要安装 pymilvus: pip install xparse-client[milvus]"
319
+ )
320
+
321
+ with pytest.raises(ImportError) as exc_info:
322
+ MilvusDestination(
323
+ db_path="milvus.db",
324
+ collection_name="test_collection",
325
+ dimension=1024
326
+ )
327
+
328
+ assert "pymilvus" in str(exc_info.value)
329
+ assert "xparse-client[milvus]" in str(exc_info.value)
330
+
331
+
332
+ @patch('xparse_client.connectors.destinations.milvus._get_pymilvus')
333
+ def test_milvus_collection_creation_error(mock_get_pymilvus):
334
+ """测试集合创建失败的场景"""
335
+ mock_client, _, mock_data_type = _setup_milvus_mock(mock_get_pymilvus)
336
+
337
+ # Mock collection 不存在
338
+ mock_client.has_collection.return_value = False
339
+
340
+ # Mock create_collection 失败
341
+ mock_client.create_collection.side_effect = Exception("Schema error")
342
+
343
+ with pytest.raises(DestinationError) as exc_info:
344
+ MilvusDestination(
345
+ db_path="milvus.db",
346
+ collection_name="test_collection",
347
+ dimension=1024
348
+ )
349
+
350
+ assert "初始化" in str(exc_info.value) or "创建" in str(exc_info.value) or "Schema" in str(exc_info.value)
351
+
352
+
353
+ @patch('xparse_client.connectors.destinations.milvus._get_pymilvus')
354
+ def test_milvus_list_collections_error(mock_get_pymilvus):
355
+ """测试 has_collection 失败的场景"""
356
+ mock_client, _, _ = _setup_milvus_mock(mock_get_pymilvus)
357
+
358
+ # Mock has_collection 失败
359
+ mock_client.has_collection.side_effect = Exception("Connection error")
360
+
361
+ with pytest.raises(DestinationError) as exc_info:
362
+ MilvusDestination(
363
+ db_path="milvus.db",
364
+ collection_name="test_collection",
365
+ dimension=1024
366
+ )
367
+
368
+ assert "初始化" in str(exc_info.value) or "连接" in str(exc_info.value) or "Connection" in str(exc_info.value)