xparse-client 0.2.11__py3-none-any.whl → 0.3.0b3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. example/1_basic_api_usage.py +198 -0
  2. example/2_async_job.py +210 -0
  3. example/3_local_workflow.py +300 -0
  4. example/4_advanced_workflow.py +327 -0
  5. example/README.md +128 -0
  6. example/config_example.json +95 -0
  7. tests/conftest.py +310 -0
  8. tests/unit/__init__.py +1 -0
  9. tests/unit/api/__init__.py +1 -0
  10. tests/unit/api/test_extract.py +232 -0
  11. tests/unit/api/test_local.py +231 -0
  12. tests/unit/api/test_parse.py +374 -0
  13. tests/unit/api/test_pipeline.py +369 -0
  14. tests/unit/api/test_workflows.py +108 -0
  15. tests/unit/connectors/test_ftp.py +525 -0
  16. tests/unit/connectors/test_local_connectors.py +324 -0
  17. tests/unit/connectors/test_milvus.py +368 -0
  18. tests/unit/connectors/test_qdrant.py +399 -0
  19. tests/unit/connectors/test_s3.py +598 -0
  20. tests/unit/connectors/test_smb.py +442 -0
  21. tests/unit/connectors/test_utils.py +335 -0
  22. tests/unit/models/test_local.py +54 -0
  23. tests/unit/models/test_pipeline_stages.py +144 -0
  24. tests/unit/models/test_workflows.py +55 -0
  25. tests/unit/test_base.py +437 -0
  26. tests/unit/test_client.py +110 -0
  27. tests/unit/test_config.py +160 -0
  28. tests/unit/test_exceptions.py +182 -0
  29. tests/unit/test_http.py +562 -0
  30. xparse_client/__init__.py +111 -20
  31. xparse_client/_base.py +179 -0
  32. xparse_client/_client.py +218 -0
  33. xparse_client/_config.py +221 -0
  34. xparse_client/_http.py +350 -0
  35. xparse_client/api/__init__.py +14 -0
  36. xparse_client/api/extract.py +109 -0
  37. xparse_client/api/local.py +215 -0
  38. xparse_client/api/parse.py +209 -0
  39. xparse_client/api/pipeline.py +134 -0
  40. xparse_client/api/workflows.py +204 -0
  41. xparse_client/connectors/__init__.py +45 -0
  42. xparse_client/connectors/_utils.py +138 -0
  43. xparse_client/connectors/destinations/__init__.py +45 -0
  44. xparse_client/connectors/destinations/base.py +116 -0
  45. xparse_client/connectors/destinations/local.py +91 -0
  46. xparse_client/connectors/destinations/milvus.py +229 -0
  47. xparse_client/connectors/destinations/qdrant.py +238 -0
  48. xparse_client/connectors/destinations/s3.py +163 -0
  49. xparse_client/connectors/sources/__init__.py +45 -0
  50. xparse_client/connectors/sources/base.py +74 -0
  51. xparse_client/connectors/sources/ftp.py +278 -0
  52. xparse_client/connectors/sources/local.py +176 -0
  53. xparse_client/connectors/sources/s3.py +232 -0
  54. xparse_client/connectors/sources/smb.py +259 -0
  55. xparse_client/exceptions.py +398 -0
  56. xparse_client/models/__init__.py +60 -0
  57. xparse_client/models/chunk.py +39 -0
  58. xparse_client/models/embed.py +62 -0
  59. xparse_client/models/extract.py +41 -0
  60. xparse_client/models/local.py +38 -0
  61. xparse_client/models/parse.py +136 -0
  62. xparse_client/models/pipeline.py +134 -0
  63. xparse_client/models/workflows.py +74 -0
  64. xparse_client-0.3.0b3.dist-info/METADATA +1075 -0
  65. xparse_client-0.3.0b3.dist-info/RECORD +68 -0
  66. {xparse_client-0.2.11.dist-info → xparse_client-0.3.0b3.dist-info}/WHEEL +1 -1
  67. {xparse_client-0.2.11.dist-info → xparse_client-0.3.0b3.dist-info}/licenses/LICENSE +1 -1
  68. {xparse_client-0.2.11.dist-info → xparse_client-0.3.0b3.dist-info}/top_level.txt +1 -0
  69. example/run_pipeline.py +0 -506
  70. example/run_pipeline_test.py +0 -458
  71. xparse_client/pipeline/__init__.py +0 -3
  72. xparse_client/pipeline/config.py +0 -129
  73. xparse_client/pipeline/destinations.py +0 -487
  74. xparse_client/pipeline/pipeline.py +0 -622
  75. xparse_client/pipeline/sources.py +0 -585
  76. xparse_client-0.2.11.dist-info/METADATA +0 -1050
  77. xparse_client-0.2.11.dist-info/RECORD +0 -13
@@ -0,0 +1,442 @@
1
+ """SmbSource 测试
2
+
3
+ 测试 SMB/CIFS 数据源连接器。使用 mock 模式测试,无需真实 SMB 服务器。
4
+
5
+ 运行方式:
6
+ pytest tests/unit/connectors/test_smb.py -v
7
+ """
8
+
9
+ from datetime import datetime
10
+ from unittest.mock import MagicMock, Mock, patch
11
+
12
+ import pytest
13
+
14
+ from xparse_client.connectors import SmbSource
15
+ from xparse_client.exceptions import SourceError
16
+
17
+ # ============================================================================
18
+ # Mock SMB 测试
19
+ # ============================================================================
20
+
21
+ def _setup_smb_mock(mock_get_smb):
22
+ """设置 SMB mock 的辅助函数
23
+
24
+ 返回:
25
+ (mock_smb_instance, mock_smb_class)
26
+ """
27
+ mock_smb_class = MagicMock()
28
+ mock_smb_instance = MagicMock()
29
+ mock_smb_class.return_value = mock_smb_instance
30
+ mock_get_smb.return_value = mock_smb_class
31
+ return mock_smb_instance, mock_smb_class
32
+
33
+
34
+ @patch('xparse_client.connectors.sources.smb._get_smb_connection')
35
+ def test_smb_source_initialization(mock_get_smb):
36
+ """测试 SmbSource 初始化"""
37
+ mock_smb, _ = _setup_smb_mock(mock_get_smb)
38
+
39
+ source = SmbSource(
40
+ host="192.168.1.100",
41
+ share_name="documents",
42
+ username="testuser",
43
+ password="testpass",
44
+ domain="WORKGROUP",
45
+ port=445,
46
+ path="/reports"
47
+ )
48
+
49
+ assert source.host == "192.168.1.100"
50
+ assert source.share_name == "documents"
51
+ assert source.username == "testuser"
52
+ assert source.domain == "WORKGROUP"
53
+ assert source.port == 445
54
+ assert source.path == "reports"
55
+ assert source.pattern is None
56
+ assert source.recursive is False
57
+
58
+ # 验证连接
59
+ mock_smb.connect.assert_called_once_with("192.168.1.100", 445)
60
+
61
+
62
+ @patch('xparse_client.connectors.sources.smb._get_smb_connection')
63
+ def test_smb_source_list_files_basic(mock_get_smb):
64
+ """测试列出文件(非递归)"""
65
+ mock_smb, _ = _setup_smb_mock(mock_get_smb)
66
+
67
+ # Mock 文件列表
68
+ mock_file1 = Mock()
69
+ mock_file1.filename = 'file1.pdf'
70
+ mock_file1.isDirectory = False
71
+
72
+ mock_file2 = Mock()
73
+ mock_file2.filename = 'file2.docx'
74
+ mock_file2.isDirectory = False
75
+
76
+ mock_dir = Mock()
77
+ mock_dir.filename = 'subdir'
78
+ mock_dir.isDirectory = True
79
+
80
+ mock_dot = Mock()
81
+ mock_dot.filename = '.'
82
+ mock_dot.isDirectory = True
83
+
84
+ mock_dotdot = Mock()
85
+ mock_dotdot.filename = '..'
86
+ mock_dotdot.isDirectory = True
87
+
88
+ mock_smb.listPath.return_value = [
89
+ mock_file1, mock_file2, mock_dir, mock_dot, mock_dotdot
90
+ ]
91
+
92
+ source = SmbSource(
93
+ host="192.168.1.100",
94
+ share_name="documents",
95
+ username="test",
96
+ password="test",
97
+ recursive=False
98
+ )
99
+
100
+ files = source.list_files()
101
+
102
+ # 非递归,只列出文件,不包含目录和 . ..
103
+ assert len(files) == 2
104
+ assert 'file1.pdf' in files
105
+ assert 'file2.docx' in files
106
+ assert 'subdir' not in files
107
+
108
+
109
+ @patch('xparse_client.connectors.sources.smb._get_smb_connection')
110
+ def test_smb_source_list_files_with_pattern(mock_get_smb):
111
+ """测试带 pattern 的文件列表"""
112
+ mock_smb, _ = _setup_smb_mock(mock_get_smb)
113
+
114
+ mock_file1 = Mock()
115
+ mock_file1.filename = 'file1.pdf'
116
+ mock_file1.isDirectory = False
117
+
118
+ mock_file2 = Mock()
119
+ mock_file2.filename = 'file2.docx'
120
+ mock_file2.isDirectory = False
121
+
122
+ mock_smb.listPath.return_value = [mock_file1, mock_file2]
123
+
124
+ source = SmbSource(
125
+ host="192.168.1.100",
126
+ share_name="documents",
127
+ username="test",
128
+ password="test",
129
+ pattern=["*.pdf"]
130
+ )
131
+
132
+ files = source.list_files()
133
+
134
+ assert len(files) == 1
135
+ assert 'file1.pdf' in files
136
+ assert 'file2.docx' not in files
137
+
138
+
139
+ @patch('xparse_client.connectors.sources.smb._get_smb_connection')
140
+ def test_smb_source_list_files_recursive(mock_get_smb):
141
+ """测试递归列出文件"""
142
+ mock_smb, _ = _setup_smb_mock(mock_get_smb)
143
+
144
+ # Mock 顶层文件
145
+ mock_file1 = Mock()
146
+ mock_file1.filename = 'file1.pdf'
147
+ mock_file1.isDirectory = False
148
+
149
+ mock_dir = Mock()
150
+ mock_dir.filename = 'subdir'
151
+ mock_dir.isDirectory = True
152
+
153
+ # Mock 子目录文件
154
+ mock_file2 = Mock()
155
+ mock_file2.filename = 'file2.pdf'
156
+ mock_file2.isDirectory = False
157
+
158
+ # 根据路径返回不同的文件列表
159
+ def mock_list_path(share, path):
160
+ if path == '/':
161
+ return [mock_file1, mock_dir]
162
+ elif 'subdir' in path:
163
+ return [mock_file2]
164
+ return []
165
+
166
+ mock_smb.listPath.side_effect = mock_list_path
167
+
168
+ source = SmbSource(
169
+ host="192.168.1.100",
170
+ share_name="documents",
171
+ username="test",
172
+ password="test",
173
+ recursive=True
174
+ )
175
+
176
+ files = source.list_files()
177
+
178
+ # 应该包含两级目录的文件
179
+ assert len(files) == 2
180
+ assert 'file1.pdf' in files
181
+ assert 'subdir/file2.pdf' in files
182
+
183
+
184
+ @patch('xparse_client.connectors.sources.smb._get_smb_connection')
185
+ def test_smb_source_list_files_ignore_hidden(mock_get_smb):
186
+ """测试忽略隐藏文件"""
187
+ mock_smb, _ = _setup_smb_mock(mock_get_smb)
188
+
189
+ mock_visible = Mock()
190
+ mock_visible.filename = 'visible.pdf'
191
+ mock_visible.isDirectory = False
192
+
193
+ mock_hidden = Mock()
194
+ mock_hidden.filename = '.hidden'
195
+ mock_hidden.isDirectory = False
196
+
197
+ mock_smb.listPath.return_value = [mock_visible, mock_hidden]
198
+
199
+ source = SmbSource(
200
+ host="192.168.1.100",
201
+ share_name="documents",
202
+ username="test",
203
+ password="test"
204
+ )
205
+
206
+ files = source.list_files()
207
+
208
+ # 应该过滤掉隐藏文件
209
+ assert len(files) == 1
210
+ assert 'visible.pdf' in files
211
+ assert '.hidden' not in files
212
+
213
+
214
+ @patch('xparse_client.connectors.sources.smb._get_smb_connection')
215
+ def test_smb_source_read_file(mock_get_smb):
216
+ """测试读取文件"""
217
+ mock_smb, _ = _setup_smb_mock(mock_get_smb)
218
+
219
+ # Mock retrieveFile
220
+ test_content = b'SMB file content'
221
+
222
+ def mock_retrieve_file(share, path, file_obj):
223
+ file_obj.write(test_content)
224
+
225
+ mock_smb.retrieveFile.side_effect = mock_retrieve_file
226
+
227
+ # Mock getAttributes
228
+ mock_attrs = Mock()
229
+ mock_attrs.create_time = datetime(2024, 1, 28, 12, 0, 0)
230
+ mock_attrs.last_write_time = datetime(2024, 1, 28, 14, 0, 0)
231
+ mock_smb.getAttributes.return_value = mock_attrs
232
+
233
+ source = SmbSource(
234
+ host="192.168.1.100",
235
+ share_name="documents",
236
+ username="test",
237
+ password="test"
238
+ )
239
+
240
+ file_bytes, data_source = source.read_file('test.pdf')
241
+
242
+ assert file_bytes == test_content
243
+ assert 'smb://192.168.1.100/documents/' in data_source['url']
244
+ assert data_source['record_locator']['protocol'] == 'smb'
245
+ assert data_source['record_locator']['server'] == '192.168.1.100'
246
+ assert data_source['record_locator']['share'] == 'documents'
247
+ assert 'version' in data_source
248
+ assert 'date_created' in data_source
249
+ assert 'date_modified' in data_source
250
+
251
+
252
+ @patch('xparse_client.connectors.sources.smb._get_smb_connection')
253
+ def test_smb_source_read_file_with_path(mock_get_smb):
254
+ """测试从指定路径读取文件"""
255
+ mock_smb, _ = _setup_smb_mock(mock_get_smb)
256
+
257
+ test_content = b'Content'
258
+
259
+ def mock_retrieve_file(share, path, file_obj):
260
+ # 验证路径包含 base path
261
+ assert '/reports/' in path or path.startswith('/reports/')
262
+ file_obj.write(test_content)
263
+
264
+ mock_smb.retrieveFile.side_effect = mock_retrieve_file
265
+
266
+ source = SmbSource(
267
+ host="192.168.1.100",
268
+ share_name="documents",
269
+ username="test",
270
+ password="test",
271
+ path="/reports"
272
+ )
273
+
274
+ file_bytes, data_source = source.read_file('test.pdf')
275
+
276
+ assert file_bytes == test_content
277
+
278
+
279
+ @patch('xparse_client.connectors.sources.smb._get_smb_connection')
280
+ def test_smb_source_read_file_not_found(mock_get_smb):
281
+ """测试读取不存在的文件"""
282
+ mock_smb, _ = _setup_smb_mock(mock_get_smb)
283
+
284
+ # Mock retrieveFile 抛出异常
285
+ mock_smb.retrieveFile.side_effect = Exception("File not found")
286
+
287
+ source = SmbSource(
288
+ host="192.168.1.100",
289
+ share_name="documents",
290
+ username="test",
291
+ password="test"
292
+ )
293
+
294
+ with pytest.raises(SourceError) as exc_info:
295
+ source.read_file('nonexistent.pdf')
296
+
297
+ assert "读取 SMB 文件失败" in str(exc_info.value)
298
+
299
+
300
+ @patch('xparse_client.connectors.sources.smb._get_smb_connection')
301
+ def test_smb_source_connection_error(mock_get_smb):
302
+ """测试连接错误"""
303
+ mock_smb, _ = _setup_smb_mock(mock_get_smb)
304
+
305
+ # Mock connect 抛出异常
306
+ mock_smb.connect.side_effect = Exception("Connection refused")
307
+
308
+ with pytest.raises(SourceError) as exc_info:
309
+ SmbSource(
310
+ host="invalid-host",
311
+ share_name="documents",
312
+ username="test",
313
+ password="test"
314
+ )
315
+
316
+ assert "SMB 连接失败" in str(exc_info.value)
317
+
318
+
319
+ @patch('xparse_client.connectors.sources.smb._get_smb_connection')
320
+ def test_smb_source_authentication_error(mock_get_smb):
321
+ """测试认证错误"""
322
+ mock_smb, _ = _setup_smb_mock(mock_get_smb)
323
+
324
+ # Mock connect 抛出认证异常
325
+ mock_smb.connect.side_effect = Exception("Authentication failed")
326
+
327
+ with pytest.raises(SourceError) as exc_info:
328
+ SmbSource(
329
+ host="192.168.1.100",
330
+ share_name="documents",
331
+ username="wrong",
332
+ password="wrong"
333
+ )
334
+
335
+ assert "SMB 连接失败" in str(exc_info.value)
336
+
337
+
338
+ @patch('xparse_client.connectors.sources.smb._get_smb_connection')
339
+ def test_smb_source_context_manager(mock_get_smb):
340
+ """测试上下文管理器"""
341
+ mock_smb, _ = _setup_smb_mock(mock_get_smb)
342
+
343
+ with SmbSource(
344
+ host="192.168.1.100",
345
+ share_name="documents",
346
+ username="test",
347
+ password="test"
348
+ ) as source:
349
+ assert source is not None
350
+
351
+ # 验证连接被关闭
352
+ mock_smb.close.assert_called_once()
353
+
354
+
355
+ @patch('xparse_client.connectors.sources.smb._get_smb_connection')
356
+ def test_smb_source_close(mock_get_smb):
357
+ """测试手动关闭连接"""
358
+ mock_smb, _ = _setup_smb_mock(mock_get_smb)
359
+
360
+ source = SmbSource(
361
+ host="192.168.1.100",
362
+ share_name="documents",
363
+ username="test",
364
+ password="test"
365
+ )
366
+
367
+ source.close()
368
+
369
+ # 验证 close 被调用
370
+ mock_smb.close.assert_called_once()
371
+
372
+
373
+ @patch('xparse_client.connectors.sources.smb._get_smb_connection')
374
+ def test_smb_source_custom_parameters(mock_get_smb):
375
+ """测试自定义参数"""
376
+ mock_smb, mock_smb_class = _setup_smb_mock(mock_get_smb)
377
+
378
+ source = SmbSource(
379
+ host="192.168.1.100",
380
+ share_name="documents",
381
+ username="test",
382
+ password="test",
383
+ domain="COMPANY",
384
+ port=1445,
385
+ path="/files",
386
+ pattern=["*.pdf", "*.docx"],
387
+ recursive=True
388
+ )
389
+
390
+ assert source.host == "192.168.1.100"
391
+ assert source.share_name == "documents"
392
+ assert source.domain == "COMPANY"
393
+ assert source.port == 1445
394
+ assert source.path == "files"
395
+ assert source.pattern == ["*.pdf", "*.docx"]
396
+ assert source.recursive is True
397
+
398
+ # 验证 SMBConnection 使用了正确的参数
399
+ mock_smb_class.assert_called_once()
400
+ call_args = mock_smb_class.call_args
401
+ assert call_args[1]['domain'] == "COMPANY"
402
+ assert call_args[1]['use_ntlm_v2'] is True
403
+
404
+
405
+ @patch('xparse_client.connectors.sources.smb._get_smb_connection')
406
+ def test_smb_source_repr(mock_get_smb):
407
+ """测试 __repr__"""
408
+ mock_smb, _ = _setup_smb_mock(mock_get_smb)
409
+
410
+ source = SmbSource(
411
+ host="192.168.1.100",
412
+ share_name="documents",
413
+ username="test",
414
+ password="test"
415
+ )
416
+
417
+ assert repr(source) == "<SmbSource host=192.168.1.100 share=documents>"
418
+
419
+
420
+ @patch('xparse_client.connectors.sources.smb._get_smb_connection')
421
+ def test_smb_source_to_timestamp(mock_get_smb):
422
+ """测试 _to_timestamp 方法"""
423
+ mock_smb, _ = _setup_smb_mock(mock_get_smb)
424
+
425
+ source = SmbSource(
426
+ host="192.168.1.100",
427
+ share_name="documents",
428
+ username="test",
429
+ password="test"
430
+ )
431
+
432
+ # None 值
433
+ assert source._to_timestamp(None) is None
434
+
435
+ # datetime 值
436
+ dt = datetime(2024, 1, 28, 12, 0, 0)
437
+ timestamp = source._to_timestamp(dt)
438
+ assert isinstance(timestamp, float)
439
+
440
+ # 数值
441
+ assert source._to_timestamp(1234567890) == 1234567890.0
442
+ assert source._to_timestamp(1234567890.123) == 1234567890.123