xparse-client 0.2.11__py3-none-any.whl → 0.3.0b3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- example/1_basic_api_usage.py +198 -0
- example/2_async_job.py +210 -0
- example/3_local_workflow.py +300 -0
- example/4_advanced_workflow.py +327 -0
- example/README.md +128 -0
- example/config_example.json +95 -0
- tests/conftest.py +310 -0
- tests/unit/__init__.py +1 -0
- tests/unit/api/__init__.py +1 -0
- tests/unit/api/test_extract.py +232 -0
- tests/unit/api/test_local.py +231 -0
- tests/unit/api/test_parse.py +374 -0
- tests/unit/api/test_pipeline.py +369 -0
- tests/unit/api/test_workflows.py +108 -0
- tests/unit/connectors/test_ftp.py +525 -0
- tests/unit/connectors/test_local_connectors.py +324 -0
- tests/unit/connectors/test_milvus.py +368 -0
- tests/unit/connectors/test_qdrant.py +399 -0
- tests/unit/connectors/test_s3.py +598 -0
- tests/unit/connectors/test_smb.py +442 -0
- tests/unit/connectors/test_utils.py +335 -0
- tests/unit/models/test_local.py +54 -0
- tests/unit/models/test_pipeline_stages.py +144 -0
- tests/unit/models/test_workflows.py +55 -0
- tests/unit/test_base.py +437 -0
- tests/unit/test_client.py +110 -0
- tests/unit/test_config.py +160 -0
- tests/unit/test_exceptions.py +182 -0
- tests/unit/test_http.py +562 -0
- xparse_client/__init__.py +111 -20
- xparse_client/_base.py +179 -0
- xparse_client/_client.py +218 -0
- xparse_client/_config.py +221 -0
- xparse_client/_http.py +350 -0
- xparse_client/api/__init__.py +14 -0
- xparse_client/api/extract.py +109 -0
- xparse_client/api/local.py +215 -0
- xparse_client/api/parse.py +209 -0
- xparse_client/api/pipeline.py +134 -0
- xparse_client/api/workflows.py +204 -0
- xparse_client/connectors/__init__.py +45 -0
- xparse_client/connectors/_utils.py +138 -0
- xparse_client/connectors/destinations/__init__.py +45 -0
- xparse_client/connectors/destinations/base.py +116 -0
- xparse_client/connectors/destinations/local.py +91 -0
- xparse_client/connectors/destinations/milvus.py +229 -0
- xparse_client/connectors/destinations/qdrant.py +238 -0
- xparse_client/connectors/destinations/s3.py +163 -0
- xparse_client/connectors/sources/__init__.py +45 -0
- xparse_client/connectors/sources/base.py +74 -0
- xparse_client/connectors/sources/ftp.py +278 -0
- xparse_client/connectors/sources/local.py +176 -0
- xparse_client/connectors/sources/s3.py +232 -0
- xparse_client/connectors/sources/smb.py +259 -0
- xparse_client/exceptions.py +398 -0
- xparse_client/models/__init__.py +60 -0
- xparse_client/models/chunk.py +39 -0
- xparse_client/models/embed.py +62 -0
- xparse_client/models/extract.py +41 -0
- xparse_client/models/local.py +38 -0
- xparse_client/models/parse.py +136 -0
- xparse_client/models/pipeline.py +134 -0
- xparse_client/models/workflows.py +74 -0
- xparse_client-0.3.0b3.dist-info/METADATA +1075 -0
- xparse_client-0.3.0b3.dist-info/RECORD +68 -0
- {xparse_client-0.2.11.dist-info → xparse_client-0.3.0b3.dist-info}/WHEEL +1 -1
- {xparse_client-0.2.11.dist-info → xparse_client-0.3.0b3.dist-info}/licenses/LICENSE +1 -1
- {xparse_client-0.2.11.dist-info → xparse_client-0.3.0b3.dist-info}/top_level.txt +1 -0
- example/run_pipeline.py +0 -506
- example/run_pipeline_test.py +0 -458
- xparse_client/pipeline/__init__.py +0 -3
- xparse_client/pipeline/config.py +0 -129
- xparse_client/pipeline/destinations.py +0 -487
- xparse_client/pipeline/pipeline.py +0 -622
- xparse_client/pipeline/sources.py +0 -585
- xparse_client-0.2.11.dist-info/METADATA +0 -1050
- xparse_client-0.2.11.dist-info/RECORD +0 -13
|
@@ -0,0 +1,598 @@
|
|
|
1
|
+
"""S3Source 和 S3Destination 测试
|
|
2
|
+
|
|
3
|
+
测试 S3/MinIO connectors。使用 mock 模式测试,无需真实 S3 服务器或 moto 库。
|
|
4
|
+
|
|
5
|
+
运行方式:
|
|
6
|
+
pytest tests/unit/connectors/test_s3.py -v
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
from unittest.mock import MagicMock, patch
|
|
11
|
+
|
|
12
|
+
import pytest
|
|
13
|
+
|
|
14
|
+
from xparse_client.connectors import S3Destination, S3Source
|
|
15
|
+
from xparse_client.exceptions import DestinationError, SourceError
|
|
16
|
+
|
|
17
|
+
# ============================================================================
|
|
18
|
+
# Mock 辅助函数
|
|
19
|
+
# ============================================================================
|
|
20
|
+
|
|
21
|
+
def _setup_s3_mock(mock_get_boto3):
|
|
22
|
+
"""设置 S3 mock 的辅助函数"""
|
|
23
|
+
mock_boto3 = MagicMock()
|
|
24
|
+
mock_config_class = MagicMock()
|
|
25
|
+
mock_client = MagicMock()
|
|
26
|
+
|
|
27
|
+
# boto3.client() 返回 mock client
|
|
28
|
+
mock_boto3.client.return_value = mock_client
|
|
29
|
+
|
|
30
|
+
# _get_boto3() 返回 (boto3, Config)
|
|
31
|
+
mock_get_boto3.return_value = (mock_boto3, mock_config_class)
|
|
32
|
+
|
|
33
|
+
return mock_client, mock_boto3, mock_config_class
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
# ============================================================================
|
|
37
|
+
# S3Source 测试
|
|
38
|
+
# ============================================================================
|
|
39
|
+
|
|
40
|
+
@patch('xparse_client.connectors.sources.s3._get_boto3')
|
|
41
|
+
def test_s3_source_initialization(mock_get_boto3):
|
|
42
|
+
"""测试 S3Source 初始化"""
|
|
43
|
+
mock_client, _, _ = _setup_s3_mock(mock_get_boto3)
|
|
44
|
+
|
|
45
|
+
# Mock head_bucket 成功
|
|
46
|
+
mock_client.head_bucket.return_value = {}
|
|
47
|
+
|
|
48
|
+
source = S3Source(
|
|
49
|
+
endpoint="https://s3.amazonaws.com",
|
|
50
|
+
access_key="test-key",
|
|
51
|
+
secret_key="test-secret",
|
|
52
|
+
bucket="test-bucket",
|
|
53
|
+
prefix="documents/",
|
|
54
|
+
region="us-west-2"
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
assert source.endpoint == "https://s3.amazonaws.com"
|
|
58
|
+
assert source.bucket == "test-bucket"
|
|
59
|
+
assert source.prefix == "documents/"
|
|
60
|
+
assert source.pattern is None
|
|
61
|
+
assert source.recursive is False
|
|
62
|
+
|
|
63
|
+
# 验证 head_bucket 被调用
|
|
64
|
+
mock_client.head_bucket.assert_called_once_with(Bucket="test-bucket")
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
@patch('xparse_client.connectors.sources.s3._get_boto3')
|
|
68
|
+
def test_s3_source_list_files_basic(mock_get_boto3):
|
|
69
|
+
"""测试列出文件(非递归)"""
|
|
70
|
+
mock_client, _, _ = _setup_s3_mock(mock_get_boto3)
|
|
71
|
+
mock_client.head_bucket.return_value = {}
|
|
72
|
+
|
|
73
|
+
# Mock paginator
|
|
74
|
+
mock_paginator = MagicMock()
|
|
75
|
+
mock_client.get_paginator.return_value = mock_paginator
|
|
76
|
+
|
|
77
|
+
# Mock 分页结果
|
|
78
|
+
mock_paginator.paginate.return_value = [
|
|
79
|
+
{
|
|
80
|
+
"Contents": [
|
|
81
|
+
{"Key": "file1.pdf"},
|
|
82
|
+
{"Key": "file2.docx"},
|
|
83
|
+
{"Key": "subdir/"}, # 目录
|
|
84
|
+
]
|
|
85
|
+
}
|
|
86
|
+
]
|
|
87
|
+
|
|
88
|
+
source = S3Source(
|
|
89
|
+
endpoint="https://s3.amazonaws.com",
|
|
90
|
+
access_key="test-key",
|
|
91
|
+
secret_key="test-secret",
|
|
92
|
+
bucket="test-bucket",
|
|
93
|
+
recursive=False
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
files = source.list_files()
|
|
97
|
+
|
|
98
|
+
# 应该过滤掉目录
|
|
99
|
+
assert len(files) == 2
|
|
100
|
+
assert 'file1.pdf' in files
|
|
101
|
+
assert 'file2.docx' in files
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
@patch('xparse_client.connectors.sources.s3._get_boto3')
|
|
105
|
+
def test_s3_source_list_files_with_prefix(mock_get_boto3):
|
|
106
|
+
"""测试带 prefix 的文件列表"""
|
|
107
|
+
mock_client, _, _ = _setup_s3_mock(mock_get_boto3)
|
|
108
|
+
mock_client.head_bucket.return_value = {}
|
|
109
|
+
|
|
110
|
+
mock_paginator = MagicMock()
|
|
111
|
+
mock_client.get_paginator.return_value = mock_paginator
|
|
112
|
+
|
|
113
|
+
mock_paginator.paginate.return_value = [
|
|
114
|
+
{
|
|
115
|
+
"Contents": [
|
|
116
|
+
{"Key": "documents/doc1.pdf"},
|
|
117
|
+
{"Key": "documents/doc2.docx"},
|
|
118
|
+
]
|
|
119
|
+
}
|
|
120
|
+
]
|
|
121
|
+
|
|
122
|
+
source = S3Source(
|
|
123
|
+
endpoint="https://s3.amazonaws.com",
|
|
124
|
+
access_key="test-key",
|
|
125
|
+
secret_key="test-secret",
|
|
126
|
+
bucket="test-bucket",
|
|
127
|
+
prefix="documents/"
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
files = source.list_files()
|
|
131
|
+
|
|
132
|
+
# 验证使用了正确的 prefix
|
|
133
|
+
call_args = mock_paginator.paginate.call_args
|
|
134
|
+
assert call_args[1]["Prefix"] == "documents/"
|
|
135
|
+
|
|
136
|
+
assert len(files) == 2
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
@patch('xparse_client.connectors.sources.s3._get_boto3')
|
|
140
|
+
def test_s3_source_list_files_with_pattern(mock_get_boto3):
|
|
141
|
+
"""测试带 pattern 的文件列表"""
|
|
142
|
+
mock_client, _, _ = _setup_s3_mock(mock_get_boto3)
|
|
143
|
+
mock_client.head_bucket.return_value = {}
|
|
144
|
+
|
|
145
|
+
mock_paginator = MagicMock()
|
|
146
|
+
mock_client.get_paginator.return_value = mock_paginator
|
|
147
|
+
|
|
148
|
+
mock_paginator.paginate.return_value = [
|
|
149
|
+
{
|
|
150
|
+
"Contents": [
|
|
151
|
+
{"Key": "file1.pdf"},
|
|
152
|
+
{"Key": "file2.docx"},
|
|
153
|
+
{"Key": "file3.txt"},
|
|
154
|
+
]
|
|
155
|
+
}
|
|
156
|
+
]
|
|
157
|
+
|
|
158
|
+
source = S3Source(
|
|
159
|
+
endpoint="https://s3.amazonaws.com",
|
|
160
|
+
access_key="test-key",
|
|
161
|
+
secret_key="test-secret",
|
|
162
|
+
bucket="test-bucket",
|
|
163
|
+
pattern=["*.pdf"]
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
files = source.list_files()
|
|
167
|
+
|
|
168
|
+
assert len(files) == 1
|
|
169
|
+
assert 'file1.pdf' in files
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
@patch('xparse_client.connectors.sources.s3._get_boto3')
|
|
173
|
+
def test_s3_source_list_files_recursive(mock_get_boto3):
|
|
174
|
+
"""测试递归列出文件"""
|
|
175
|
+
mock_client, _, _ = _setup_s3_mock(mock_get_boto3)
|
|
176
|
+
mock_client.head_bucket.return_value = {}
|
|
177
|
+
|
|
178
|
+
mock_paginator = MagicMock()
|
|
179
|
+
mock_client.get_paginator.return_value = mock_paginator
|
|
180
|
+
|
|
181
|
+
mock_paginator.paginate.return_value = [
|
|
182
|
+
{
|
|
183
|
+
"Contents": [
|
|
184
|
+
{"Key": "file1.pdf"},
|
|
185
|
+
{"Key": "subdir/file2.pdf"},
|
|
186
|
+
{"Key": "subdir/subdir2/file3.pdf"},
|
|
187
|
+
]
|
|
188
|
+
}
|
|
189
|
+
]
|
|
190
|
+
|
|
191
|
+
source = S3Source(
|
|
192
|
+
endpoint="https://s3.amazonaws.com",
|
|
193
|
+
access_key="test-key",
|
|
194
|
+
secret_key="test-secret",
|
|
195
|
+
bucket="test-bucket",
|
|
196
|
+
recursive=True
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
files = source.list_files()
|
|
200
|
+
|
|
201
|
+
# 递归模式不应该使用 Delimiter
|
|
202
|
+
call_args = mock_paginator.paginate.call_args
|
|
203
|
+
assert "Delimiter" not in call_args[1]
|
|
204
|
+
|
|
205
|
+
assert len(files) == 3
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
@patch('xparse_client.connectors.sources.s3._get_boto3')
|
|
209
|
+
def test_s3_source_read_file(mock_get_boto3):
|
|
210
|
+
"""测试读取文件"""
|
|
211
|
+
mock_client, _, _ = _setup_s3_mock(mock_get_boto3)
|
|
212
|
+
mock_client.head_bucket.return_value = {}
|
|
213
|
+
|
|
214
|
+
# Mock get_object
|
|
215
|
+
test_content = b'S3 file content'
|
|
216
|
+
mock_body = MagicMock()
|
|
217
|
+
mock_body.read.return_value = test_content
|
|
218
|
+
|
|
219
|
+
mock_client.get_object.return_value = {
|
|
220
|
+
"Body": mock_body,
|
|
221
|
+
"ResponseMetadata": {
|
|
222
|
+
"HTTPHeaders": {
|
|
223
|
+
"etag": '"abc123"',
|
|
224
|
+
"last-modified": "Mon, 28 Jan 2024 12:00:00 GMT",
|
|
225
|
+
"server": "AmazonS3"
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
source = S3Source(
|
|
231
|
+
endpoint="https://s3.amazonaws.com",
|
|
232
|
+
access_key="test-key",
|
|
233
|
+
secret_key="test-secret",
|
|
234
|
+
bucket="test-bucket"
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
file_bytes, data_source = source.read_file('test.pdf')
|
|
238
|
+
|
|
239
|
+
assert file_bytes == test_content
|
|
240
|
+
assert data_source['url'] == 's3://test-bucket/test.pdf'
|
|
241
|
+
assert data_source['version'] == 'abc123'
|
|
242
|
+
assert data_source['record_locator']['protocol'] == 's3'
|
|
243
|
+
assert 'date_modified' in data_source
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
@patch('xparse_client.connectors.sources.s3._get_boto3')
|
|
247
|
+
def test_s3_source_read_file_not_found(mock_get_boto3):
|
|
248
|
+
"""测试读取不存在的文件"""
|
|
249
|
+
mock_client, _, _ = _setup_s3_mock(mock_get_boto3)
|
|
250
|
+
mock_client.head_bucket.return_value = {}
|
|
251
|
+
|
|
252
|
+
# Mock NoSuchKey 异常
|
|
253
|
+
Exception("NoSuchKey")
|
|
254
|
+
mock_client.exceptions.NoSuchKey = type('NoSuchKey', (Exception,), {})
|
|
255
|
+
mock_client.get_object.side_effect = mock_client.exceptions.NoSuchKey("Not found")
|
|
256
|
+
|
|
257
|
+
source = S3Source(
|
|
258
|
+
endpoint="https://s3.amazonaws.com",
|
|
259
|
+
access_key="test-key",
|
|
260
|
+
secret_key="test-secret",
|
|
261
|
+
bucket="test-bucket"
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
with pytest.raises(SourceError) as exc_info:
|
|
265
|
+
source.read_file('nonexistent.pdf')
|
|
266
|
+
|
|
267
|
+
assert "S3 对象不存在" in str(exc_info.value)
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
@patch('xparse_client.connectors.sources.s3._get_boto3')
|
|
271
|
+
def test_s3_source_connection_error(mock_get_boto3):
|
|
272
|
+
"""测试连接错误"""
|
|
273
|
+
mock_client, _, _ = _setup_s3_mock(mock_get_boto3)
|
|
274
|
+
|
|
275
|
+
# Mock head_bucket 失败
|
|
276
|
+
mock_client.head_bucket.side_effect = Exception("Connection refused")
|
|
277
|
+
|
|
278
|
+
with pytest.raises(SourceError) as exc_info:
|
|
279
|
+
S3Source(
|
|
280
|
+
endpoint="https://invalid-endpoint.com",
|
|
281
|
+
access_key="test-key",
|
|
282
|
+
secret_key="test-secret",
|
|
283
|
+
bucket="test-bucket"
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
assert "S3 连接失败" in str(exc_info.value)
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
@patch('xparse_client.connectors.sources.s3._get_boto3')
|
|
290
|
+
def test_s3_source_context_manager(mock_get_boto3):
|
|
291
|
+
"""测试上下文管理器"""
|
|
292
|
+
mock_client, _, _ = _setup_s3_mock(mock_get_boto3)
|
|
293
|
+
mock_client.head_bucket.return_value = {}
|
|
294
|
+
|
|
295
|
+
with S3Source(
|
|
296
|
+
endpoint="https://s3.amazonaws.com",
|
|
297
|
+
access_key="test-key",
|
|
298
|
+
secret_key="test-secret",
|
|
299
|
+
bucket="test-bucket"
|
|
300
|
+
) as source:
|
|
301
|
+
assert source is not None
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
@patch('xparse_client.connectors.sources.s3._get_boto3')
|
|
305
|
+
def test_s3_source_repr(mock_get_boto3):
|
|
306
|
+
"""测试 __repr__"""
|
|
307
|
+
mock_client, _, _ = _setup_s3_mock(mock_get_boto3)
|
|
308
|
+
mock_client.head_bucket.return_value = {}
|
|
309
|
+
|
|
310
|
+
source = S3Source(
|
|
311
|
+
endpoint="https://s3.amazonaws.com",
|
|
312
|
+
access_key="test-key",
|
|
313
|
+
secret_key="test-secret",
|
|
314
|
+
bucket="test-bucket"
|
|
315
|
+
)
|
|
316
|
+
|
|
317
|
+
assert repr(source) == "<S3Source endpoint=https://s3.amazonaws.com bucket=test-bucket>"
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
# ============================================================================
|
|
321
|
+
# S3Destination 测试
|
|
322
|
+
# ============================================================================
|
|
323
|
+
|
|
324
|
+
@patch('xparse_client.connectors.destinations.s3._get_boto3')
|
|
325
|
+
def test_s3_destination_initialization(mock_get_boto3):
|
|
326
|
+
"""测试 S3Destination 初始化"""
|
|
327
|
+
mock_client, _, _ = _setup_s3_mock(mock_get_boto3)
|
|
328
|
+
|
|
329
|
+
# Mock head_bucket 和 put_object 成功
|
|
330
|
+
mock_client.head_bucket.return_value = {}
|
|
331
|
+
mock_client.put_object.return_value = {}
|
|
332
|
+
mock_client.delete_object.return_value = {}
|
|
333
|
+
|
|
334
|
+
dest = S3Destination(
|
|
335
|
+
endpoint="https://s3.amazonaws.com",
|
|
336
|
+
access_key="test-key",
|
|
337
|
+
secret_key="test-secret",
|
|
338
|
+
bucket="test-bucket",
|
|
339
|
+
prefix="output/"
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
assert dest.endpoint == "https://s3.amazonaws.com"
|
|
343
|
+
assert dest.bucket == "test-bucket"
|
|
344
|
+
assert dest.prefix == "output" # 注意会 strip /
|
|
345
|
+
|
|
346
|
+
# 验证测试写入
|
|
347
|
+
assert mock_client.put_object.called
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
@patch('xparse_client.connectors.destinations.s3._get_boto3')
|
|
351
|
+
def test_s3_destination_write(mock_get_boto3):
|
|
352
|
+
"""测试写入数据"""
|
|
353
|
+
mock_client, _, _ = _setup_s3_mock(mock_get_boto3)
|
|
354
|
+
mock_client.head_bucket.return_value = {}
|
|
355
|
+
mock_client.put_object.return_value = {}
|
|
356
|
+
mock_client.delete_object.return_value = {}
|
|
357
|
+
|
|
358
|
+
dest = S3Destination(
|
|
359
|
+
endpoint="https://s3.amazonaws.com",
|
|
360
|
+
access_key="test-key",
|
|
361
|
+
secret_key="test-secret",
|
|
362
|
+
bucket="test-bucket"
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
data = [
|
|
366
|
+
{"element_id": "1", "text": "test1"},
|
|
367
|
+
{"element_id": "2", "text": "test2"}
|
|
368
|
+
]
|
|
369
|
+
metadata = {"filename": "test.pdf"}
|
|
370
|
+
|
|
371
|
+
result = dest.write(data, metadata)
|
|
372
|
+
|
|
373
|
+
assert result is True
|
|
374
|
+
|
|
375
|
+
# 验证 put_object 被调用(至少 2 次:测试写入 + 实际写入)
|
|
376
|
+
assert mock_client.put_object.call_count >= 2
|
|
377
|
+
|
|
378
|
+
# 检查最后一次调用的参数
|
|
379
|
+
last_call = mock_client.put_object.call_args_list[-1]
|
|
380
|
+
assert last_call[1]["Bucket"] == "test-bucket"
|
|
381
|
+
assert last_call[1]["Key"] == "test.json"
|
|
382
|
+
assert last_call[1]["ContentType"] == "application/json"
|
|
383
|
+
|
|
384
|
+
# 验证写入的数据
|
|
385
|
+
written_data = json.loads(last_call[1]["Body"].decode("utf-8"))
|
|
386
|
+
assert len(written_data) == 2
|
|
387
|
+
assert written_data[0]["element_id"] == "1"
|
|
388
|
+
|
|
389
|
+
|
|
390
|
+
@patch('xparse_client.connectors.destinations.s3._get_boto3')
|
|
391
|
+
def test_s3_destination_write_with_prefix(mock_get_boto3):
|
|
392
|
+
"""测试带 prefix 的写入"""
|
|
393
|
+
mock_client, _, _ = _setup_s3_mock(mock_get_boto3)
|
|
394
|
+
mock_client.head_bucket.return_value = {}
|
|
395
|
+
mock_client.put_object.return_value = {}
|
|
396
|
+
mock_client.delete_object.return_value = {}
|
|
397
|
+
|
|
398
|
+
dest = S3Destination(
|
|
399
|
+
endpoint="https://s3.amazonaws.com",
|
|
400
|
+
access_key="test-key",
|
|
401
|
+
secret_key="test-secret",
|
|
402
|
+
bucket="test-bucket",
|
|
403
|
+
prefix="output/"
|
|
404
|
+
)
|
|
405
|
+
|
|
406
|
+
data = [{"element_id": "1"}]
|
|
407
|
+
metadata = {"filename": "test.pdf"}
|
|
408
|
+
|
|
409
|
+
result = dest.write(data, metadata)
|
|
410
|
+
|
|
411
|
+
assert result is True
|
|
412
|
+
|
|
413
|
+
# 检查 key 包含 prefix
|
|
414
|
+
last_call = mock_client.put_object.call_args_list[-1]
|
|
415
|
+
assert last_call[1]["Key"] == "output/test.json"
|
|
416
|
+
|
|
417
|
+
|
|
418
|
+
@patch('xparse_client.connectors.destinations.s3._get_boto3')
|
|
419
|
+
def test_s3_destination_write_error(mock_get_boto3):
|
|
420
|
+
"""测试写入错误"""
|
|
421
|
+
mock_client, _, _ = _setup_s3_mock(mock_get_boto3)
|
|
422
|
+
mock_client.head_bucket.return_value = {}
|
|
423
|
+
|
|
424
|
+
# 第一次 put_object(测试写入)成功
|
|
425
|
+
mock_client.put_object.side_effect = [{}, Exception("Write failed")]
|
|
426
|
+
mock_client.delete_object.return_value = {}
|
|
427
|
+
|
|
428
|
+
dest = S3Destination(
|
|
429
|
+
endpoint="https://s3.amazonaws.com",
|
|
430
|
+
access_key="test-key",
|
|
431
|
+
secret_key="test-secret",
|
|
432
|
+
bucket="test-bucket"
|
|
433
|
+
)
|
|
434
|
+
|
|
435
|
+
data = [{"element_id": "1"}]
|
|
436
|
+
metadata = {"filename": "test.pdf"}
|
|
437
|
+
|
|
438
|
+
with pytest.raises(DestinationError) as exc_info:
|
|
439
|
+
dest.write(data, metadata)
|
|
440
|
+
|
|
441
|
+
assert "写入 S3 失败" in str(exc_info.value)
|
|
442
|
+
|
|
443
|
+
|
|
444
|
+
@patch('xparse_client.connectors.destinations.s3._get_boto3')
|
|
445
|
+
def test_s3_destination_connection_error(mock_get_boto3):
|
|
446
|
+
"""测试连接错误"""
|
|
447
|
+
mock_client, _, _ = _setup_s3_mock(mock_get_boto3)
|
|
448
|
+
|
|
449
|
+
# Mock head_bucket 失败
|
|
450
|
+
mock_client.head_bucket.side_effect = Exception("Bucket not found")
|
|
451
|
+
|
|
452
|
+
with pytest.raises(DestinationError) as exc_info:
|
|
453
|
+
S3Destination(
|
|
454
|
+
endpoint="https://s3.amazonaws.com",
|
|
455
|
+
access_key="test-key",
|
|
456
|
+
secret_key="test-secret",
|
|
457
|
+
bucket="nonexistent-bucket"
|
|
458
|
+
)
|
|
459
|
+
|
|
460
|
+
assert "S3 连接或写入测试失败" in str(exc_info.value)
|
|
461
|
+
|
|
462
|
+
|
|
463
|
+
@patch('xparse_client.connectors.destinations.s3._get_boto3')
|
|
464
|
+
def test_s3_destination_context_manager(mock_get_boto3):
|
|
465
|
+
"""测试上下文管理器"""
|
|
466
|
+
mock_client, _, _ = _setup_s3_mock(mock_get_boto3)
|
|
467
|
+
mock_client.head_bucket.return_value = {}
|
|
468
|
+
mock_client.put_object.return_value = {}
|
|
469
|
+
mock_client.delete_object.return_value = {}
|
|
470
|
+
|
|
471
|
+
with S3Destination(
|
|
472
|
+
endpoint="https://s3.amazonaws.com",
|
|
473
|
+
access_key="test-key",
|
|
474
|
+
secret_key="test-secret",
|
|
475
|
+
bucket="test-bucket"
|
|
476
|
+
) as dest:
|
|
477
|
+
assert dest is not None
|
|
478
|
+
|
|
479
|
+
|
|
480
|
+
@patch('xparse_client.connectors.destinations.s3._get_boto3')
|
|
481
|
+
def test_s3_destination_repr(mock_get_boto3):
|
|
482
|
+
"""测试 __repr__"""
|
|
483
|
+
mock_client, _, _ = _setup_s3_mock(mock_get_boto3)
|
|
484
|
+
mock_client.head_bucket.return_value = {}
|
|
485
|
+
mock_client.put_object.return_value = {}
|
|
486
|
+
mock_client.delete_object.return_value = {}
|
|
487
|
+
|
|
488
|
+
dest = S3Destination(
|
|
489
|
+
endpoint="https://s3.amazonaws.com",
|
|
490
|
+
access_key="test-key",
|
|
491
|
+
secret_key="test-secret",
|
|
492
|
+
bucket="test-bucket"
|
|
493
|
+
)
|
|
494
|
+
|
|
495
|
+
assert repr(dest) == "<S3Destination endpoint=https://s3.amazonaws.com bucket=test-bucket>"
|
|
496
|
+
|
|
497
|
+
|
|
498
|
+
# ============================================================================
|
|
499
|
+
# 懒加载失败测试
|
|
500
|
+
# ============================================================================
|
|
501
|
+
|
|
502
|
+
@patch('xparse_client.connectors.sources.s3._get_boto3')
|
|
503
|
+
def test_s3_source_import_error(mock_get_boto3):
|
|
504
|
+
"""测试 boto3 未安装时的 ImportError (Source)"""
|
|
505
|
+
# Mock ImportError
|
|
506
|
+
mock_get_boto3.side_effect = ImportError(
|
|
507
|
+
"使用 S3Source 需要安装 boto3: pip install xparse-client[s3]"
|
|
508
|
+
)
|
|
509
|
+
|
|
510
|
+
with pytest.raises(ImportError) as exc_info:
|
|
511
|
+
S3Source(
|
|
512
|
+
endpoint="https://s3.amazonaws.com",
|
|
513
|
+
bucket="test-bucket",
|
|
514
|
+
access_key="test-key",
|
|
515
|
+
secret_key="test-secret"
|
|
516
|
+
)
|
|
517
|
+
|
|
518
|
+
assert "boto3" in str(exc_info.value)
|
|
519
|
+
assert "xparse-client[s3]" in str(exc_info.value)
|
|
520
|
+
|
|
521
|
+
|
|
522
|
+
@patch('xparse_client.connectors.destinations.s3._get_boto3')
|
|
523
|
+
def test_s3_destination_import_error(mock_get_boto3):
|
|
524
|
+
"""测试 boto3 未安装时的 ImportError (Destination)"""
|
|
525
|
+
# Mock ImportError
|
|
526
|
+
mock_get_boto3.side_effect = ImportError(
|
|
527
|
+
"使用 S3Destination 需要安装 boto3: pip install xparse-client[s3]"
|
|
528
|
+
)
|
|
529
|
+
|
|
530
|
+
with pytest.raises(ImportError) as exc_info:
|
|
531
|
+
S3Destination(
|
|
532
|
+
endpoint="https://s3.amazonaws.com",
|
|
533
|
+
bucket="test-bucket",
|
|
534
|
+
access_key="test-key",
|
|
535
|
+
secret_key="test-secret"
|
|
536
|
+
)
|
|
537
|
+
|
|
538
|
+
assert "boto3" in str(exc_info.value)
|
|
539
|
+
assert "xparse-client[s3]" in str(exc_info.value)
|
|
540
|
+
|
|
541
|
+
|
|
542
|
+
@patch('xparse_client.connectors.sources.s3._get_boto3')
|
|
543
|
+
def test_s3_source_list_files_pagination_error(mock_get_boto3):
|
|
544
|
+
"""测试 list_objects_v2 分页失败场景"""
|
|
545
|
+
from xparse_client.exceptions import SourceError
|
|
546
|
+
|
|
547
|
+
mock_client, _, _ = _setup_s3_mock(mock_get_boto3)
|
|
548
|
+
mock_client.head_bucket.return_value = {}
|
|
549
|
+
|
|
550
|
+
# Mock paginator 失败
|
|
551
|
+
mock_paginator = MagicMock()
|
|
552
|
+
mock_paginator.paginate.side_effect = Exception("Pagination error")
|
|
553
|
+
mock_client.get_paginator.return_value = mock_paginator
|
|
554
|
+
|
|
555
|
+
source = S3Source(
|
|
556
|
+
endpoint="https://s3.amazonaws.com",
|
|
557
|
+
bucket="test-bucket",
|
|
558
|
+
access_key="test-key",
|
|
559
|
+
secret_key="test-secret"
|
|
560
|
+
)
|
|
561
|
+
|
|
562
|
+
with pytest.raises(SourceError) as exc_info:
|
|
563
|
+
list(source.list_files())
|
|
564
|
+
|
|
565
|
+
assert "列出文件" in str(exc_info.value) or "list" in str(exc_info.value).lower()
|
|
566
|
+
|
|
567
|
+
|
|
568
|
+
@patch('xparse_client.connectors.sources.s3._get_boto3')
|
|
569
|
+
def test_s3_source_read_file_get_object_error(mock_get_boto3):
|
|
570
|
+
"""测试 get_object 失败场景"""
|
|
571
|
+
from xparse_client.exceptions import SourceError
|
|
572
|
+
|
|
573
|
+
mock_client, _, _ = _setup_s3_mock(mock_get_boto3)
|
|
574
|
+
mock_client.head_bucket.return_value = {}
|
|
575
|
+
|
|
576
|
+
# 创建一个 NoSuchKey 异常类
|
|
577
|
+
class NoSuchKeyError(Exception):
|
|
578
|
+
pass
|
|
579
|
+
|
|
580
|
+
# Mock exceptions 属性
|
|
581
|
+
mock_exceptions = MagicMock()
|
|
582
|
+
mock_exceptions.NoSuchKey = NoSuchKeyError
|
|
583
|
+
mock_client.exceptions = mock_exceptions
|
|
584
|
+
|
|
585
|
+
# Mock get_object 抛出 NoSuchKey 异常
|
|
586
|
+
mock_client.get_object.side_effect = NoSuchKeyError("The specified key does not exist.")
|
|
587
|
+
|
|
588
|
+
source = S3Source(
|
|
589
|
+
endpoint="https://s3.amazonaws.com",
|
|
590
|
+
bucket="test-bucket",
|
|
591
|
+
access_key="test-key",
|
|
592
|
+
secret_key="test-secret"
|
|
593
|
+
)
|
|
594
|
+
|
|
595
|
+
with pytest.raises(SourceError) as exc_info:
|
|
596
|
+
source.read_file("test.txt")
|
|
597
|
+
|
|
598
|
+
assert "不存在" in str(exc_info.value) or "exist" in str(exc_info.value).lower()
|