xparse-client 0.2.19__py3-none-any.whl → 0.3.0b3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. example/1_basic_api_usage.py +198 -0
  2. example/2_async_job.py +210 -0
  3. example/3_local_workflow.py +300 -0
  4. example/4_advanced_workflow.py +327 -0
  5. example/README.md +128 -0
  6. example/config_example.json +95 -0
  7. tests/conftest.py +310 -0
  8. tests/unit/__init__.py +1 -0
  9. tests/unit/api/__init__.py +1 -0
  10. tests/unit/api/test_extract.py +232 -0
  11. tests/unit/api/test_local.py +231 -0
  12. tests/unit/api/test_parse.py +374 -0
  13. tests/unit/api/test_pipeline.py +369 -0
  14. tests/unit/api/test_workflows.py +108 -0
  15. tests/unit/connectors/test_ftp.py +525 -0
  16. tests/unit/connectors/test_local_connectors.py +324 -0
  17. tests/unit/connectors/test_milvus.py +368 -0
  18. tests/unit/connectors/test_qdrant.py +399 -0
  19. tests/unit/connectors/test_s3.py +598 -0
  20. tests/unit/connectors/test_smb.py +442 -0
  21. tests/unit/connectors/test_utils.py +335 -0
  22. tests/unit/models/test_local.py +54 -0
  23. tests/unit/models/test_pipeline_stages.py +144 -0
  24. tests/unit/models/test_workflows.py +55 -0
  25. tests/unit/test_base.py +437 -0
  26. tests/unit/test_client.py +110 -0
  27. tests/unit/test_config.py +160 -0
  28. tests/unit/test_exceptions.py +182 -0
  29. tests/unit/test_http.py +562 -0
  30. xparse_client/__init__.py +111 -20
  31. xparse_client/_base.py +179 -0
  32. xparse_client/_client.py +218 -0
  33. xparse_client/_config.py +221 -0
  34. xparse_client/_http.py +350 -0
  35. xparse_client/api/__init__.py +14 -0
  36. xparse_client/api/extract.py +109 -0
  37. xparse_client/api/local.py +215 -0
  38. xparse_client/api/parse.py +209 -0
  39. xparse_client/api/pipeline.py +134 -0
  40. xparse_client/api/workflows.py +204 -0
  41. xparse_client/connectors/__init__.py +45 -0
  42. xparse_client/connectors/_utils.py +138 -0
  43. xparse_client/connectors/destinations/__init__.py +45 -0
  44. xparse_client/connectors/destinations/base.py +116 -0
  45. xparse_client/connectors/destinations/local.py +91 -0
  46. xparse_client/connectors/destinations/milvus.py +229 -0
  47. xparse_client/connectors/destinations/qdrant.py +238 -0
  48. xparse_client/connectors/destinations/s3.py +163 -0
  49. xparse_client/connectors/sources/__init__.py +45 -0
  50. xparse_client/connectors/sources/base.py +74 -0
  51. xparse_client/connectors/sources/ftp.py +278 -0
  52. xparse_client/connectors/sources/local.py +176 -0
  53. xparse_client/connectors/sources/s3.py +232 -0
  54. xparse_client/connectors/sources/smb.py +259 -0
  55. xparse_client/exceptions.py +398 -0
  56. xparse_client/models/__init__.py +60 -0
  57. xparse_client/models/chunk.py +39 -0
  58. xparse_client/models/embed.py +62 -0
  59. xparse_client/models/extract.py +41 -0
  60. xparse_client/models/local.py +38 -0
  61. xparse_client/models/parse.py +136 -0
  62. xparse_client/models/pipeline.py +134 -0
  63. xparse_client/models/workflows.py +74 -0
  64. xparse_client-0.3.0b3.dist-info/METADATA +1075 -0
  65. xparse_client-0.3.0b3.dist-info/RECORD +68 -0
  66. {xparse_client-0.2.19.dist-info → xparse_client-0.3.0b3.dist-info}/WHEEL +1 -1
  67. {xparse_client-0.2.19.dist-info → xparse_client-0.3.0b3.dist-info}/licenses/LICENSE +1 -1
  68. {xparse_client-0.2.19.dist-info → xparse_client-0.3.0b3.dist-info}/top_level.txt +2 -0
  69. xparse_client/pipeline/__init__.py +0 -3
  70. xparse_client/pipeline/config.py +0 -129
  71. xparse_client/pipeline/destinations.py +0 -489
  72. xparse_client/pipeline/pipeline.py +0 -690
  73. xparse_client/pipeline/sources.py +0 -583
  74. xparse_client-0.2.19.dist-info/METADATA +0 -1050
  75. xparse_client-0.2.19.dist-info/RECORD +0 -11
@@ -0,0 +1,598 @@
1
+ """S3Source 和 S3Destination 测试
2
+
3
+ 测试 S3/MinIO connectors。使用 mock 模式测试,无需真实 S3 服务器或 moto 库。
4
+
5
+ 运行方式:
6
+ pytest tests/unit/connectors/test_s3.py -v
7
+ """
8
+
9
+ import json
10
+ from unittest.mock import MagicMock, patch
11
+
12
+ import pytest
13
+
14
+ from xparse_client.connectors import S3Destination, S3Source
15
+ from xparse_client.exceptions import DestinationError, SourceError
16
+
17
+ # ============================================================================
18
+ # Mock 辅助函数
19
+ # ============================================================================
20
+
21
+ def _setup_s3_mock(mock_get_boto3):
22
+ """设置 S3 mock 的辅助函数"""
23
+ mock_boto3 = MagicMock()
24
+ mock_config_class = MagicMock()
25
+ mock_client = MagicMock()
26
+
27
+ # boto3.client() 返回 mock client
28
+ mock_boto3.client.return_value = mock_client
29
+
30
+ # _get_boto3() 返回 (boto3, Config)
31
+ mock_get_boto3.return_value = (mock_boto3, mock_config_class)
32
+
33
+ return mock_client, mock_boto3, mock_config_class
34
+
35
+
36
+ # ============================================================================
37
+ # S3Source 测试
38
+ # ============================================================================
39
+
40
+ @patch('xparse_client.connectors.sources.s3._get_boto3')
41
+ def test_s3_source_initialization(mock_get_boto3):
42
+ """测试 S3Source 初始化"""
43
+ mock_client, _, _ = _setup_s3_mock(mock_get_boto3)
44
+
45
+ # Mock head_bucket 成功
46
+ mock_client.head_bucket.return_value = {}
47
+
48
+ source = S3Source(
49
+ endpoint="https://s3.amazonaws.com",
50
+ access_key="test-key",
51
+ secret_key="test-secret",
52
+ bucket="test-bucket",
53
+ prefix="documents/",
54
+ region="us-west-2"
55
+ )
56
+
57
+ assert source.endpoint == "https://s3.amazonaws.com"
58
+ assert source.bucket == "test-bucket"
59
+ assert source.prefix == "documents/"
60
+ assert source.pattern is None
61
+ assert source.recursive is False
62
+
63
+ # 验证 head_bucket 被调用
64
+ mock_client.head_bucket.assert_called_once_with(Bucket="test-bucket")
65
+
66
+
67
+ @patch('xparse_client.connectors.sources.s3._get_boto3')
68
+ def test_s3_source_list_files_basic(mock_get_boto3):
69
+ """测试列出文件(非递归)"""
70
+ mock_client, _, _ = _setup_s3_mock(mock_get_boto3)
71
+ mock_client.head_bucket.return_value = {}
72
+
73
+ # Mock paginator
74
+ mock_paginator = MagicMock()
75
+ mock_client.get_paginator.return_value = mock_paginator
76
+
77
+ # Mock 分页结果
78
+ mock_paginator.paginate.return_value = [
79
+ {
80
+ "Contents": [
81
+ {"Key": "file1.pdf"},
82
+ {"Key": "file2.docx"},
83
+ {"Key": "subdir/"}, # 目录
84
+ ]
85
+ }
86
+ ]
87
+
88
+ source = S3Source(
89
+ endpoint="https://s3.amazonaws.com",
90
+ access_key="test-key",
91
+ secret_key="test-secret",
92
+ bucket="test-bucket",
93
+ recursive=False
94
+ )
95
+
96
+ files = source.list_files()
97
+
98
+ # 应该过滤掉目录
99
+ assert len(files) == 2
100
+ assert 'file1.pdf' in files
101
+ assert 'file2.docx' in files
102
+
103
+
104
+ @patch('xparse_client.connectors.sources.s3._get_boto3')
105
+ def test_s3_source_list_files_with_prefix(mock_get_boto3):
106
+ """测试带 prefix 的文件列表"""
107
+ mock_client, _, _ = _setup_s3_mock(mock_get_boto3)
108
+ mock_client.head_bucket.return_value = {}
109
+
110
+ mock_paginator = MagicMock()
111
+ mock_client.get_paginator.return_value = mock_paginator
112
+
113
+ mock_paginator.paginate.return_value = [
114
+ {
115
+ "Contents": [
116
+ {"Key": "documents/doc1.pdf"},
117
+ {"Key": "documents/doc2.docx"},
118
+ ]
119
+ }
120
+ ]
121
+
122
+ source = S3Source(
123
+ endpoint="https://s3.amazonaws.com",
124
+ access_key="test-key",
125
+ secret_key="test-secret",
126
+ bucket="test-bucket",
127
+ prefix="documents/"
128
+ )
129
+
130
+ files = source.list_files()
131
+
132
+ # 验证使用了正确的 prefix
133
+ call_args = mock_paginator.paginate.call_args
134
+ assert call_args[1]["Prefix"] == "documents/"
135
+
136
+ assert len(files) == 2
137
+
138
+
139
+ @patch('xparse_client.connectors.sources.s3._get_boto3')
140
+ def test_s3_source_list_files_with_pattern(mock_get_boto3):
141
+ """测试带 pattern 的文件列表"""
142
+ mock_client, _, _ = _setup_s3_mock(mock_get_boto3)
143
+ mock_client.head_bucket.return_value = {}
144
+
145
+ mock_paginator = MagicMock()
146
+ mock_client.get_paginator.return_value = mock_paginator
147
+
148
+ mock_paginator.paginate.return_value = [
149
+ {
150
+ "Contents": [
151
+ {"Key": "file1.pdf"},
152
+ {"Key": "file2.docx"},
153
+ {"Key": "file3.txt"},
154
+ ]
155
+ }
156
+ ]
157
+
158
+ source = S3Source(
159
+ endpoint="https://s3.amazonaws.com",
160
+ access_key="test-key",
161
+ secret_key="test-secret",
162
+ bucket="test-bucket",
163
+ pattern=["*.pdf"]
164
+ )
165
+
166
+ files = source.list_files()
167
+
168
+ assert len(files) == 1
169
+ assert 'file1.pdf' in files
170
+
171
+
172
+ @patch('xparse_client.connectors.sources.s3._get_boto3')
173
+ def test_s3_source_list_files_recursive(mock_get_boto3):
174
+ """测试递归列出文件"""
175
+ mock_client, _, _ = _setup_s3_mock(mock_get_boto3)
176
+ mock_client.head_bucket.return_value = {}
177
+
178
+ mock_paginator = MagicMock()
179
+ mock_client.get_paginator.return_value = mock_paginator
180
+
181
+ mock_paginator.paginate.return_value = [
182
+ {
183
+ "Contents": [
184
+ {"Key": "file1.pdf"},
185
+ {"Key": "subdir/file2.pdf"},
186
+ {"Key": "subdir/subdir2/file3.pdf"},
187
+ ]
188
+ }
189
+ ]
190
+
191
+ source = S3Source(
192
+ endpoint="https://s3.amazonaws.com",
193
+ access_key="test-key",
194
+ secret_key="test-secret",
195
+ bucket="test-bucket",
196
+ recursive=True
197
+ )
198
+
199
+ files = source.list_files()
200
+
201
+ # 递归模式不应该使用 Delimiter
202
+ call_args = mock_paginator.paginate.call_args
203
+ assert "Delimiter" not in call_args[1]
204
+
205
+ assert len(files) == 3
206
+
207
+
208
+ @patch('xparse_client.connectors.sources.s3._get_boto3')
209
+ def test_s3_source_read_file(mock_get_boto3):
210
+ """测试读取文件"""
211
+ mock_client, _, _ = _setup_s3_mock(mock_get_boto3)
212
+ mock_client.head_bucket.return_value = {}
213
+
214
+ # Mock get_object
215
+ test_content = b'S3 file content'
216
+ mock_body = MagicMock()
217
+ mock_body.read.return_value = test_content
218
+
219
+ mock_client.get_object.return_value = {
220
+ "Body": mock_body,
221
+ "ResponseMetadata": {
222
+ "HTTPHeaders": {
223
+ "etag": '"abc123"',
224
+ "last-modified": "Mon, 28 Jan 2024 12:00:00 GMT",
225
+ "server": "AmazonS3"
226
+ }
227
+ }
228
+ }
229
+
230
+ source = S3Source(
231
+ endpoint="https://s3.amazonaws.com",
232
+ access_key="test-key",
233
+ secret_key="test-secret",
234
+ bucket="test-bucket"
235
+ )
236
+
237
+ file_bytes, data_source = source.read_file('test.pdf')
238
+
239
+ assert file_bytes == test_content
240
+ assert data_source['url'] == 's3://test-bucket/test.pdf'
241
+ assert data_source['version'] == 'abc123'
242
+ assert data_source['record_locator']['protocol'] == 's3'
243
+ assert 'date_modified' in data_source
244
+
245
+
246
+ @patch('xparse_client.connectors.sources.s3._get_boto3')
247
+ def test_s3_source_read_file_not_found(mock_get_boto3):
248
+ """测试读取不存在的文件"""
249
+ mock_client, _, _ = _setup_s3_mock(mock_get_boto3)
250
+ mock_client.head_bucket.return_value = {}
251
+
252
+ # Mock NoSuchKey 异常
253
+ Exception("NoSuchKey")
254
+ mock_client.exceptions.NoSuchKey = type('NoSuchKey', (Exception,), {})
255
+ mock_client.get_object.side_effect = mock_client.exceptions.NoSuchKey("Not found")
256
+
257
+ source = S3Source(
258
+ endpoint="https://s3.amazonaws.com",
259
+ access_key="test-key",
260
+ secret_key="test-secret",
261
+ bucket="test-bucket"
262
+ )
263
+
264
+ with pytest.raises(SourceError) as exc_info:
265
+ source.read_file('nonexistent.pdf')
266
+
267
+ assert "S3 对象不存在" in str(exc_info.value)
268
+
269
+
270
+ @patch('xparse_client.connectors.sources.s3._get_boto3')
271
+ def test_s3_source_connection_error(mock_get_boto3):
272
+ """测试连接错误"""
273
+ mock_client, _, _ = _setup_s3_mock(mock_get_boto3)
274
+
275
+ # Mock head_bucket 失败
276
+ mock_client.head_bucket.side_effect = Exception("Connection refused")
277
+
278
+ with pytest.raises(SourceError) as exc_info:
279
+ S3Source(
280
+ endpoint="https://invalid-endpoint.com",
281
+ access_key="test-key",
282
+ secret_key="test-secret",
283
+ bucket="test-bucket"
284
+ )
285
+
286
+ assert "S3 连接失败" in str(exc_info.value)
287
+
288
+
289
+ @patch('xparse_client.connectors.sources.s3._get_boto3')
290
+ def test_s3_source_context_manager(mock_get_boto3):
291
+ """测试上下文管理器"""
292
+ mock_client, _, _ = _setup_s3_mock(mock_get_boto3)
293
+ mock_client.head_bucket.return_value = {}
294
+
295
+ with S3Source(
296
+ endpoint="https://s3.amazonaws.com",
297
+ access_key="test-key",
298
+ secret_key="test-secret",
299
+ bucket="test-bucket"
300
+ ) as source:
301
+ assert source is not None
302
+
303
+
304
+ @patch('xparse_client.connectors.sources.s3._get_boto3')
305
+ def test_s3_source_repr(mock_get_boto3):
306
+ """测试 __repr__"""
307
+ mock_client, _, _ = _setup_s3_mock(mock_get_boto3)
308
+ mock_client.head_bucket.return_value = {}
309
+
310
+ source = S3Source(
311
+ endpoint="https://s3.amazonaws.com",
312
+ access_key="test-key",
313
+ secret_key="test-secret",
314
+ bucket="test-bucket"
315
+ )
316
+
317
+ assert repr(source) == "<S3Source endpoint=https://s3.amazonaws.com bucket=test-bucket>"
318
+
319
+
320
+ # ============================================================================
321
+ # S3Destination 测试
322
+ # ============================================================================
323
+
324
+ @patch('xparse_client.connectors.destinations.s3._get_boto3')
325
+ def test_s3_destination_initialization(mock_get_boto3):
326
+ """测试 S3Destination 初始化"""
327
+ mock_client, _, _ = _setup_s3_mock(mock_get_boto3)
328
+
329
+ # Mock head_bucket 和 put_object 成功
330
+ mock_client.head_bucket.return_value = {}
331
+ mock_client.put_object.return_value = {}
332
+ mock_client.delete_object.return_value = {}
333
+
334
+ dest = S3Destination(
335
+ endpoint="https://s3.amazonaws.com",
336
+ access_key="test-key",
337
+ secret_key="test-secret",
338
+ bucket="test-bucket",
339
+ prefix="output/"
340
+ )
341
+
342
+ assert dest.endpoint == "https://s3.amazonaws.com"
343
+ assert dest.bucket == "test-bucket"
344
+ assert dest.prefix == "output" # 注意会 strip /
345
+
346
+ # 验证测试写入
347
+ assert mock_client.put_object.called
348
+
349
+
350
+ @patch('xparse_client.connectors.destinations.s3._get_boto3')
351
+ def test_s3_destination_write(mock_get_boto3):
352
+ """测试写入数据"""
353
+ mock_client, _, _ = _setup_s3_mock(mock_get_boto3)
354
+ mock_client.head_bucket.return_value = {}
355
+ mock_client.put_object.return_value = {}
356
+ mock_client.delete_object.return_value = {}
357
+
358
+ dest = S3Destination(
359
+ endpoint="https://s3.amazonaws.com",
360
+ access_key="test-key",
361
+ secret_key="test-secret",
362
+ bucket="test-bucket"
363
+ )
364
+
365
+ data = [
366
+ {"element_id": "1", "text": "test1"},
367
+ {"element_id": "2", "text": "test2"}
368
+ ]
369
+ metadata = {"filename": "test.pdf"}
370
+
371
+ result = dest.write(data, metadata)
372
+
373
+ assert result is True
374
+
375
+ # 验证 put_object 被调用(至少 2 次:测试写入 + 实际写入)
376
+ assert mock_client.put_object.call_count >= 2
377
+
378
+ # 检查最后一次调用的参数
379
+ last_call = mock_client.put_object.call_args_list[-1]
380
+ assert last_call[1]["Bucket"] == "test-bucket"
381
+ assert last_call[1]["Key"] == "test.json"
382
+ assert last_call[1]["ContentType"] == "application/json"
383
+
384
+ # 验证写入的数据
385
+ written_data = json.loads(last_call[1]["Body"].decode("utf-8"))
386
+ assert len(written_data) == 2
387
+ assert written_data[0]["element_id"] == "1"
388
+
389
+
390
+ @patch('xparse_client.connectors.destinations.s3._get_boto3')
391
+ def test_s3_destination_write_with_prefix(mock_get_boto3):
392
+ """测试带 prefix 的写入"""
393
+ mock_client, _, _ = _setup_s3_mock(mock_get_boto3)
394
+ mock_client.head_bucket.return_value = {}
395
+ mock_client.put_object.return_value = {}
396
+ mock_client.delete_object.return_value = {}
397
+
398
+ dest = S3Destination(
399
+ endpoint="https://s3.amazonaws.com",
400
+ access_key="test-key",
401
+ secret_key="test-secret",
402
+ bucket="test-bucket",
403
+ prefix="output/"
404
+ )
405
+
406
+ data = [{"element_id": "1"}]
407
+ metadata = {"filename": "test.pdf"}
408
+
409
+ result = dest.write(data, metadata)
410
+
411
+ assert result is True
412
+
413
+ # 检查 key 包含 prefix
414
+ last_call = mock_client.put_object.call_args_list[-1]
415
+ assert last_call[1]["Key"] == "output/test.json"
416
+
417
+
418
+ @patch('xparse_client.connectors.destinations.s3._get_boto3')
419
+ def test_s3_destination_write_error(mock_get_boto3):
420
+ """测试写入错误"""
421
+ mock_client, _, _ = _setup_s3_mock(mock_get_boto3)
422
+ mock_client.head_bucket.return_value = {}
423
+
424
+ # 第一次 put_object(测试写入)成功
425
+ mock_client.put_object.side_effect = [{}, Exception("Write failed")]
426
+ mock_client.delete_object.return_value = {}
427
+
428
+ dest = S3Destination(
429
+ endpoint="https://s3.amazonaws.com",
430
+ access_key="test-key",
431
+ secret_key="test-secret",
432
+ bucket="test-bucket"
433
+ )
434
+
435
+ data = [{"element_id": "1"}]
436
+ metadata = {"filename": "test.pdf"}
437
+
438
+ with pytest.raises(DestinationError) as exc_info:
439
+ dest.write(data, metadata)
440
+
441
+ assert "写入 S3 失败" in str(exc_info.value)
442
+
443
+
444
+ @patch('xparse_client.connectors.destinations.s3._get_boto3')
445
+ def test_s3_destination_connection_error(mock_get_boto3):
446
+ """测试连接错误"""
447
+ mock_client, _, _ = _setup_s3_mock(mock_get_boto3)
448
+
449
+ # Mock head_bucket 失败
450
+ mock_client.head_bucket.side_effect = Exception("Bucket not found")
451
+
452
+ with pytest.raises(DestinationError) as exc_info:
453
+ S3Destination(
454
+ endpoint="https://s3.amazonaws.com",
455
+ access_key="test-key",
456
+ secret_key="test-secret",
457
+ bucket="nonexistent-bucket"
458
+ )
459
+
460
+ assert "S3 连接或写入测试失败" in str(exc_info.value)
461
+
462
+
463
+ @patch('xparse_client.connectors.destinations.s3._get_boto3')
464
+ def test_s3_destination_context_manager(mock_get_boto3):
465
+ """测试上下文管理器"""
466
+ mock_client, _, _ = _setup_s3_mock(mock_get_boto3)
467
+ mock_client.head_bucket.return_value = {}
468
+ mock_client.put_object.return_value = {}
469
+ mock_client.delete_object.return_value = {}
470
+
471
+ with S3Destination(
472
+ endpoint="https://s3.amazonaws.com",
473
+ access_key="test-key",
474
+ secret_key="test-secret",
475
+ bucket="test-bucket"
476
+ ) as dest:
477
+ assert dest is not None
478
+
479
+
480
+ @patch('xparse_client.connectors.destinations.s3._get_boto3')
481
+ def test_s3_destination_repr(mock_get_boto3):
482
+ """测试 __repr__"""
483
+ mock_client, _, _ = _setup_s3_mock(mock_get_boto3)
484
+ mock_client.head_bucket.return_value = {}
485
+ mock_client.put_object.return_value = {}
486
+ mock_client.delete_object.return_value = {}
487
+
488
+ dest = S3Destination(
489
+ endpoint="https://s3.amazonaws.com",
490
+ access_key="test-key",
491
+ secret_key="test-secret",
492
+ bucket="test-bucket"
493
+ )
494
+
495
+ assert repr(dest) == "<S3Destination endpoint=https://s3.amazonaws.com bucket=test-bucket>"
496
+
497
+
498
+ # ============================================================================
499
+ # 懒加载失败测试
500
+ # ============================================================================
501
+
502
+ @patch('xparse_client.connectors.sources.s3._get_boto3')
503
+ def test_s3_source_import_error(mock_get_boto3):
504
+ """测试 boto3 未安装时的 ImportError (Source)"""
505
+ # Mock ImportError
506
+ mock_get_boto3.side_effect = ImportError(
507
+ "使用 S3Source 需要安装 boto3: pip install xparse-client[s3]"
508
+ )
509
+
510
+ with pytest.raises(ImportError) as exc_info:
511
+ S3Source(
512
+ endpoint="https://s3.amazonaws.com",
513
+ bucket="test-bucket",
514
+ access_key="test-key",
515
+ secret_key="test-secret"
516
+ )
517
+
518
+ assert "boto3" in str(exc_info.value)
519
+ assert "xparse-client[s3]" in str(exc_info.value)
520
+
521
+
522
+ @patch('xparse_client.connectors.destinations.s3._get_boto3')
523
+ def test_s3_destination_import_error(mock_get_boto3):
524
+ """测试 boto3 未安装时的 ImportError (Destination)"""
525
+ # Mock ImportError
526
+ mock_get_boto3.side_effect = ImportError(
527
+ "使用 S3Destination 需要安装 boto3: pip install xparse-client[s3]"
528
+ )
529
+
530
+ with pytest.raises(ImportError) as exc_info:
531
+ S3Destination(
532
+ endpoint="https://s3.amazonaws.com",
533
+ bucket="test-bucket",
534
+ access_key="test-key",
535
+ secret_key="test-secret"
536
+ )
537
+
538
+ assert "boto3" in str(exc_info.value)
539
+ assert "xparse-client[s3]" in str(exc_info.value)
540
+
541
+
542
+ @patch('xparse_client.connectors.sources.s3._get_boto3')
543
+ def test_s3_source_list_files_pagination_error(mock_get_boto3):
544
+ """测试 list_objects_v2 分页失败场景"""
545
+ from xparse_client.exceptions import SourceError
546
+
547
+ mock_client, _, _ = _setup_s3_mock(mock_get_boto3)
548
+ mock_client.head_bucket.return_value = {}
549
+
550
+ # Mock paginator 失败
551
+ mock_paginator = MagicMock()
552
+ mock_paginator.paginate.side_effect = Exception("Pagination error")
553
+ mock_client.get_paginator.return_value = mock_paginator
554
+
555
+ source = S3Source(
556
+ endpoint="https://s3.amazonaws.com",
557
+ bucket="test-bucket",
558
+ access_key="test-key",
559
+ secret_key="test-secret"
560
+ )
561
+
562
+ with pytest.raises(SourceError) as exc_info:
563
+ list(source.list_files())
564
+
565
+ assert "列出文件" in str(exc_info.value) or "list" in str(exc_info.value).lower()
566
+
567
+
568
+ @patch('xparse_client.connectors.sources.s3._get_boto3')
569
+ def test_s3_source_read_file_get_object_error(mock_get_boto3):
570
+ """测试 get_object 失败场景"""
571
+ from xparse_client.exceptions import SourceError
572
+
573
+ mock_client, _, _ = _setup_s3_mock(mock_get_boto3)
574
+ mock_client.head_bucket.return_value = {}
575
+
576
+ # 创建一个 NoSuchKey 异常类
577
+ class NoSuchKeyError(Exception):
578
+ pass
579
+
580
+ # Mock exceptions 属性
581
+ mock_exceptions = MagicMock()
582
+ mock_exceptions.NoSuchKey = NoSuchKeyError
583
+ mock_client.exceptions = mock_exceptions
584
+
585
+ # Mock get_object 抛出 NoSuchKey 异常
586
+ mock_client.get_object.side_effect = NoSuchKeyError("The specified key does not exist.")
587
+
588
+ source = S3Source(
589
+ endpoint="https://s3.amazonaws.com",
590
+ bucket="test-bucket",
591
+ access_key="test-key",
592
+ secret_key="test-secret"
593
+ )
594
+
595
+ with pytest.raises(SourceError) as exc_info:
596
+ source.read_file("test.txt")
597
+
598
+ assert "不存在" in str(exc_info.value) or "exist" in str(exc_info.value).lower()