xparse-client 0.2.19__py3-none-any.whl → 0.3.0b8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. example/1_basic_api_usage.py +198 -0
  2. example/2_async_job.py +210 -0
  3. example/3_local_workflow.py +300 -0
  4. example/4_advanced_workflow.py +327 -0
  5. example/README.md +128 -0
  6. example/config_example.json +95 -0
  7. tests/conftest.py +310 -0
  8. tests/unit/__init__.py +1 -0
  9. tests/unit/api/__init__.py +1 -0
  10. tests/unit/api/test_extract.py +232 -0
  11. tests/unit/api/test_local.py +231 -0
  12. tests/unit/api/test_parse.py +374 -0
  13. tests/unit/api/test_pipeline.py +369 -0
  14. tests/unit/api/test_workflows.py +108 -0
  15. tests/unit/connectors/test_ftp.py +525 -0
  16. tests/unit/connectors/test_local_connectors.py +324 -0
  17. tests/unit/connectors/test_milvus.py +368 -0
  18. tests/unit/connectors/test_qdrant.py +399 -0
  19. tests/unit/connectors/test_s3.py +598 -0
  20. tests/unit/connectors/test_smb.py +442 -0
  21. tests/unit/connectors/test_utils.py +335 -0
  22. tests/unit/models/test_local.py +54 -0
  23. tests/unit/models/test_pipeline_stages.py +144 -0
  24. tests/unit/models/test_workflows.py +55 -0
  25. tests/unit/test_base.py +437 -0
  26. tests/unit/test_client.py +110 -0
  27. tests/unit/test_config.py +160 -0
  28. tests/unit/test_exceptions.py +182 -0
  29. tests/unit/test_http.py +562 -0
  30. xparse_client/__init__.py +111 -20
  31. xparse_client/_base.py +188 -0
  32. xparse_client/_client.py +218 -0
  33. xparse_client/_config.py +221 -0
  34. xparse_client/_http.py +351 -0
  35. xparse_client/api/__init__.py +14 -0
  36. xparse_client/api/extract.py +109 -0
  37. xparse_client/api/local.py +225 -0
  38. xparse_client/api/parse.py +209 -0
  39. xparse_client/api/pipeline.py +134 -0
  40. xparse_client/api/workflows.py +204 -0
  41. xparse_client/connectors/__init__.py +45 -0
  42. xparse_client/connectors/_utils.py +138 -0
  43. xparse_client/connectors/destinations/__init__.py +45 -0
  44. xparse_client/connectors/destinations/base.py +116 -0
  45. xparse_client/connectors/destinations/local.py +91 -0
  46. xparse_client/connectors/destinations/milvus.py +229 -0
  47. xparse_client/connectors/destinations/qdrant.py +238 -0
  48. xparse_client/connectors/destinations/s3.py +163 -0
  49. xparse_client/connectors/sources/__init__.py +45 -0
  50. xparse_client/connectors/sources/base.py +74 -0
  51. xparse_client/connectors/sources/ftp.py +278 -0
  52. xparse_client/connectors/sources/local.py +176 -0
  53. xparse_client/connectors/sources/s3.py +232 -0
  54. xparse_client/connectors/sources/smb.py +259 -0
  55. xparse_client/exceptions.py +398 -0
  56. xparse_client/models/__init__.py +60 -0
  57. xparse_client/models/chunk.py +39 -0
  58. xparse_client/models/embed.py +62 -0
  59. xparse_client/models/extract.py +41 -0
  60. xparse_client/models/local.py +38 -0
  61. xparse_client/models/parse.py +132 -0
  62. xparse_client/models/pipeline.py +134 -0
  63. xparse_client/models/workflows.py +74 -0
  64. xparse_client-0.3.0b8.dist-info/METADATA +1075 -0
  65. xparse_client-0.3.0b8.dist-info/RECORD +68 -0
  66. {xparse_client-0.2.19.dist-info → xparse_client-0.3.0b8.dist-info}/WHEEL +1 -1
  67. {xparse_client-0.2.19.dist-info → xparse_client-0.3.0b8.dist-info}/licenses/LICENSE +1 -1
  68. {xparse_client-0.2.19.dist-info → xparse_client-0.3.0b8.dist-info}/top_level.txt +2 -0
  69. xparse_client/pipeline/__init__.py +0 -3
  70. xparse_client/pipeline/config.py +0 -129
  71. xparse_client/pipeline/destinations.py +0 -489
  72. xparse_client/pipeline/pipeline.py +0 -690
  73. xparse_client/pipeline/sources.py +0 -583
  74. xparse_client-0.2.19.dist-info/METADATA +0 -1050
  75. xparse_client-0.2.19.dist-info/RECORD +0 -11
@@ -0,0 +1,399 @@
1
+ """QdrantDestination 测试
2
+
3
+ 测试 Qdrant 向量数据库 Destination。使用 mock 模式测试,无需真实 Qdrant 实例。
4
+
5
+ 运行方式:
6
+ pytest tests/unit/connectors/test_qdrant.py -v
7
+ """
8
+
9
+ from unittest.mock import MagicMock, patch
10
+
11
+ import pytest
12
+
13
+ from xparse_client.connectors import QdrantDestination
14
+ from xparse_client.exceptions import DestinationError
15
+
16
+ # ============================================================================
17
+ # Mock 辅助函数
18
+ # ============================================================================
19
+
20
+ def _setup_qdrant_mock(mock_get_qdrant):
21
+ """设置 Qdrant mock 的辅助函数"""
22
+ mock_qdrant_client_class = MagicMock()
23
+ mock_distance = MagicMock()
24
+ mock_payload_schema = MagicMock()
25
+ mock_point_struct = MagicMock()
26
+ mock_vector_params = MagicMock()
27
+ mock_client_instance = MagicMock()
28
+
29
+ # QdrantClient() 返回 mock 实例
30
+ mock_qdrant_client_class.return_value = mock_client_instance
31
+
32
+ # _get_qdrant() 返回所有需要的类
33
+ mock_get_qdrant.return_value = (
34
+ mock_qdrant_client_class,
35
+ mock_distance,
36
+ mock_payload_schema,
37
+ mock_point_struct,
38
+ mock_vector_params,
39
+ )
40
+
41
+ return (
42
+ mock_client_instance,
43
+ mock_qdrant_client_class,
44
+ mock_distance,
45
+ mock_payload_schema,
46
+ mock_point_struct,
47
+ mock_vector_params,
48
+ )
49
+
50
+
51
+ # ============================================================================
52
+ # QdrantDestination 测试
53
+ # ============================================================================
54
+
55
+ @patch('xparse_client.connectors.destinations.qdrant._get_qdrant')
56
+ def test_qdrant_destination_initialization(mock_get_qdrant):
57
+ """测试 QdrantDestination 初始化"""
58
+ mock_client, _, _, _, _, _ = _setup_qdrant_mock(mock_get_qdrant)
59
+
60
+ # Mock collection 不存在
61
+ mock_client.collection_exists.return_value = False
62
+
63
+ dest = QdrantDestination(
64
+ url="http://localhost:6333",
65
+ collection_name="test_collection",
66
+ dimension=1024
67
+ )
68
+
69
+ assert dest.url == "http://localhost:6333"
70
+ assert dest.collection_name == "test_collection"
71
+ assert dest.dimension == 1024
72
+
73
+ # 验证 collection 被创建
74
+ assert mock_client.create_collection.called
75
+
76
+
77
+ @patch('xparse_client.connectors.destinations.qdrant._get_qdrant')
78
+ def test_qdrant_destination_with_api_key(mock_get_qdrant):
79
+ """测试使用 API key 初始化"""
80
+ mock_client, mock_class, _, _, _, _ = _setup_qdrant_mock(mock_get_qdrant)
81
+ mock_client.collection_exists.return_value = False
82
+
83
+ QdrantDestination(
84
+ url="http://localhost:6333",
85
+ collection_name="test_collection",
86
+ dimension=1024,
87
+ api_key="test-api-key"
88
+ )
89
+
90
+ # 验证 API key 被传递
91
+ call_args = mock_class.call_args
92
+ assert call_args[1]["api_key"] == "test-api-key"
93
+
94
+
95
+ @patch('xparse_client.connectors.destinations.qdrant._get_qdrant')
96
+ def test_qdrant_destination_prefer_grpc(mock_get_qdrant):
97
+ """测试使用 gRPC"""
98
+ mock_client, mock_class, _, _, _, _ = _setup_qdrant_mock(mock_get_qdrant)
99
+ mock_client.collection_exists.return_value = False
100
+
101
+ QdrantDestination(
102
+ url="http://localhost:6333",
103
+ collection_name="test_collection",
104
+ dimension=1024,
105
+ prefer_grpc=True
106
+ )
107
+
108
+ # 验证 prefer_grpc 被传递
109
+ call_args = mock_class.call_args
110
+ assert call_args[1]["prefer_grpc"] is True
111
+
112
+
113
+ @patch('xparse_client.connectors.destinations.qdrant._get_qdrant')
114
+ def test_qdrant_destination_collection_exists(mock_get_qdrant):
115
+ """测试 collection 已存在的情况"""
116
+ mock_client, _, _, _, _, _ = _setup_qdrant_mock(mock_get_qdrant)
117
+
118
+ # Mock collection 已存在
119
+ mock_client.collection_exists.return_value = True
120
+
121
+ QdrantDestination(
122
+ url="http://localhost:6333",
123
+ collection_name="test_collection",
124
+ dimension=1024
125
+ )
126
+
127
+ # collection 存在时不应该创建,但需要设置 payload schema
128
+ # 实际实现中 collection_exists=True 后会跳过创建但设置 schema
129
+
130
+
131
+ @patch('xparse_client.connectors.destinations.qdrant._get_qdrant')
132
+ def test_qdrant_destination_write_with_embeddings(mock_get_qdrant):
133
+ """测试写入带 embeddings 的数据"""
134
+ mock_client, _, _, _, mock_point_struct, _ = _setup_qdrant_mock(mock_get_qdrant)
135
+ mock_client.collection_exists.return_value = True
136
+
137
+ dest = QdrantDestination(
138
+ url="http://localhost:6333",
139
+ collection_name="test_collection",
140
+ dimension=1024
141
+ )
142
+
143
+ data = [
144
+ {
145
+ "element_id": "elem_001",
146
+ "text": "Test text",
147
+ "embeddings": [0.1] * 1024,
148
+ "metadata": {"page": 1}
149
+ }
150
+ ]
151
+ metadata = {"filename": "test.pdf", "record_id": "rec_123"}
152
+
153
+ result = dest.write(data, metadata)
154
+
155
+ assert result is True
156
+
157
+ # 验证 upsert 被调用
158
+ mock_client.upsert.assert_called_once()
159
+ call_args = mock_client.upsert.call_args
160
+
161
+ # 验证 collection_name
162
+ assert call_args[1]["collection_name"] == "test_collection"
163
+
164
+ # 验证使用了 PointStruct
165
+ assert mock_point_struct.called
166
+
167
+
168
+ @patch('xparse_client.connectors.destinations.qdrant._get_qdrant')
169
+ def test_qdrant_destination_write_missing_embeddings(mock_get_qdrant):
170
+ """测试写入缺少 embeddings 的数据(应该返回 False)"""
171
+ mock_client, _, _, _, _, _ = _setup_qdrant_mock(mock_get_qdrant)
172
+ mock_client.collection_exists.return_value = True
173
+
174
+ dest = QdrantDestination(
175
+ url="http://localhost:6333",
176
+ collection_name="test_collection",
177
+ dimension=1024
178
+ )
179
+
180
+ # 缺少 embeddings 的数据
181
+ data = [
182
+ {
183
+ "element_id": "elem_001",
184
+ "text": "Test text"
185
+ }
186
+ ]
187
+ metadata = {"filename": "test.pdf", "record_id": "rec_123"}
188
+
189
+ # 应该返回 False(因为没有有效数据)
190
+ result = dest.write(data, metadata)
191
+ assert result is False
192
+
193
+ # upsert 不应该被调用
194
+ assert not mock_client.upsert.called
195
+
196
+
197
+ @patch('xparse_client.connectors.destinations.qdrant._get_qdrant')
198
+ def test_qdrant_destination_write_wrong_dimension(mock_get_qdrant):
199
+ """测试写入错误维度的向量(仍然会尝试写入)"""
200
+ mock_client, _, _, _, _, _ = _setup_qdrant_mock(mock_get_qdrant)
201
+ mock_client.collection_exists.return_value = True
202
+
203
+ dest = QdrantDestination(
204
+ url="http://localhost:6333",
205
+ collection_name="test_collection",
206
+ dimension=1024
207
+ )
208
+
209
+ # 错误维度的 embeddings
210
+ data = [
211
+ {
212
+ "element_id": "elem_001",
213
+ "text": "Test text",
214
+ "embeddings": [0.1] * 512 # 应该是 1024
215
+ }
216
+ ]
217
+ metadata = {"filename": "test.pdf", "record_id": "rec_123"}
218
+
219
+ # 实现中会尝试写入(由数据库层面检查维度)
220
+ result = dest.write(data, metadata)
221
+ assert result is True
222
+
223
+ # upsert 会被调用
224
+ assert mock_client.upsert.called
225
+
226
+
227
+ @patch('xparse_client.connectors.destinations.qdrant._get_qdrant')
228
+ def test_qdrant_destination_batch_write(mock_get_qdrant):
229
+ """测试批量写入"""
230
+ mock_client, _, _, _, mock_point_struct, _ = _setup_qdrant_mock(mock_get_qdrant)
231
+ mock_client.collection_exists.return_value = True
232
+
233
+ dest = QdrantDestination(
234
+ url="http://localhost:6333",
235
+ collection_name="test_collection",
236
+ dimension=1024
237
+ )
238
+
239
+ # 多条数据
240
+ data = [
241
+ {
242
+ "element_id": f"elem_{i:03d}",
243
+ "text": f"Text {i}",
244
+ "embeddings": [0.1 * i] * 1024
245
+ }
246
+ for i in range(10)
247
+ ]
248
+ metadata = {"filename": "test.pdf", "record_id": "rec_123"}
249
+
250
+ result = dest.write(data, metadata)
251
+
252
+ assert result is True
253
+
254
+ # 验证 upsert 被调用
255
+ mock_client.upsert.assert_called_once()
256
+
257
+ # 验证创建了 10 个 Point
258
+ assert mock_point_struct.call_count == 10
259
+
260
+
261
+ @patch('xparse_client.connectors.destinations.qdrant._get_qdrant')
262
+ def test_qdrant_destination_connection_error(mock_get_qdrant):
263
+ """测试连接错误"""
264
+ mock_client, mock_class, _, _, _, _ = _setup_qdrant_mock(mock_get_qdrant)
265
+
266
+ # Mock 客户端创建失败
267
+ mock_class.side_effect = Exception("Connection failed")
268
+
269
+ with pytest.raises(DestinationError) as exc_info:
270
+ QdrantDestination(
271
+ url="http://invalid-host:6333",
272
+ collection_name="test_collection",
273
+ dimension=1024
274
+ )
275
+
276
+ assert "初始化" in str(exc_info.value) or "连接" in str(exc_info.value)
277
+
278
+
279
+ @patch('xparse_client.connectors.destinations.qdrant._get_qdrant')
280
+ def test_qdrant_destination_write_error(mock_get_qdrant):
281
+ """测试写入错误"""
282
+ mock_client, _, _, _, _, _ = _setup_qdrant_mock(mock_get_qdrant)
283
+ mock_client.collection_exists.return_value = True
284
+
285
+ # Mock upsert 失败
286
+ mock_client.upsert.side_effect = Exception("Write failed")
287
+
288
+ dest = QdrantDestination(
289
+ url="http://localhost:6333",
290
+ collection_name="test_collection",
291
+ dimension=1024
292
+ )
293
+
294
+ data = [
295
+ {
296
+ "element_id": "elem_001",
297
+ "text": "Test",
298
+ "embeddings": [0.1] * 1024
299
+ }
300
+ ]
301
+ metadata = {"filename": "test.pdf", "record_id": "rec_123"}
302
+
303
+ with pytest.raises(DestinationError) as exc_info:
304
+ dest.write(data, metadata)
305
+
306
+ assert "写入" in str(exc_info.value) or "Write" in str(exc_info.value)
307
+
308
+
309
+ @patch('xparse_client.connectors.destinations.qdrant._get_qdrant')
310
+ def test_qdrant_destination_context_manager(mock_get_qdrant):
311
+ """测试上下文管理器"""
312
+ mock_client, _, _, _, _, _ = _setup_qdrant_mock(mock_get_qdrant)
313
+ mock_client.collection_exists.return_value = False
314
+
315
+ with QdrantDestination(
316
+ url="http://localhost:6333",
317
+ collection_name="test_collection",
318
+ dimension=1024
319
+ ) as dest:
320
+ assert dest is not None
321
+
322
+
323
+ @patch('xparse_client.connectors.destinations.qdrant._get_qdrant')
324
+ def test_qdrant_destination_repr(mock_get_qdrant):
325
+ """测试 __repr__"""
326
+ mock_client, _, _, _, _, _ = _setup_qdrant_mock(mock_get_qdrant)
327
+ mock_client.collection_exists.return_value = False
328
+
329
+ dest = QdrantDestination(
330
+ url="http://localhost:6333",
331
+ collection_name="test_collection",
332
+ dimension=1024
333
+ )
334
+
335
+ assert repr(dest) == "<QdrantDestination url=http://localhost:6333 collection=test_collection>"
336
+
337
+
338
+ # ============================================================================
339
+ # 懒加载失败测试
340
+ # ============================================================================
341
+
342
+ @patch('xparse_client.connectors.destinations.qdrant._get_qdrant')
343
+ def test_qdrant_import_error(mock_get_qdrant):
344
+ """测试 qdrant-client 未安装时的 ImportError"""
345
+ # Mock ImportError
346
+ mock_get_qdrant.side_effect = ImportError(
347
+ "使用 QdrantDestination 需要安装 qdrant-client: pip install xparse-client[qdrant]"
348
+ )
349
+
350
+ with pytest.raises(ImportError) as exc_info:
351
+ QdrantDestination(
352
+ url="http://localhost:6333",
353
+ collection_name="test_collection",
354
+ dimension=1024
355
+ )
356
+
357
+ assert "qdrant" in str(exc_info.value)
358
+ assert "xparse-client[qdrant]" in str(exc_info.value)
359
+
360
+
361
+ @patch('xparse_client.connectors.destinations.qdrant._get_qdrant')
362
+ def test_qdrant_collection_creation_error(mock_get_qdrant):
363
+ """测试集合创建失败的场景"""
364
+ mock_client, _, _, _, mock_vector_params, _ = _setup_qdrant_mock(mock_get_qdrant)
365
+
366
+ # Mock collection 不存在
367
+ mock_client.collection_exists.return_value = False
368
+
369
+ # Mock create_collection 失败
370
+ mock_client.create_collection.side_effect = Exception("Schema error")
371
+
372
+ with pytest.raises(DestinationError) as exc_info:
373
+ QdrantDestination(
374
+ url="http://localhost:6333",
375
+ collection_name="test_collection",
376
+ dimension=1024
377
+ )
378
+
379
+ # 错误信息应该包含"连接"或"Schema"
380
+ assert "连接" in str(exc_info.value) or "Schema" in str(exc_info.value)
381
+
382
+
383
+ @patch('xparse_client.connectors.destinations.qdrant._get_qdrant')
384
+ def test_qdrant_collection_exists_error(mock_get_qdrant):
385
+ """测试 get_collections 检查失败的场景"""
386
+ mock_client, _, _, _, _, _ = _setup_qdrant_mock(mock_get_qdrant)
387
+
388
+ # Mock get_collections 失败
389
+ mock_client.get_collections.side_effect = Exception("Connection error")
390
+
391
+ with pytest.raises(DestinationError) as exc_info:
392
+ QdrantDestination(
393
+ url="http://localhost:6333",
394
+ collection_name="test_collection",
395
+ dimension=1024
396
+ )
397
+
398
+ # 错误信息应该包含"连接"或"Connection"
399
+ assert "连接" in str(exc_info.value) or "Connection" in str(exc_info.value)