xparse-client 0.2.11__py3-none-any.whl → 0.3.0b3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- example/1_basic_api_usage.py +198 -0
- example/2_async_job.py +210 -0
- example/3_local_workflow.py +300 -0
- example/4_advanced_workflow.py +327 -0
- example/README.md +128 -0
- example/config_example.json +95 -0
- tests/conftest.py +310 -0
- tests/unit/__init__.py +1 -0
- tests/unit/api/__init__.py +1 -0
- tests/unit/api/test_extract.py +232 -0
- tests/unit/api/test_local.py +231 -0
- tests/unit/api/test_parse.py +374 -0
- tests/unit/api/test_pipeline.py +369 -0
- tests/unit/api/test_workflows.py +108 -0
- tests/unit/connectors/test_ftp.py +525 -0
- tests/unit/connectors/test_local_connectors.py +324 -0
- tests/unit/connectors/test_milvus.py +368 -0
- tests/unit/connectors/test_qdrant.py +399 -0
- tests/unit/connectors/test_s3.py +598 -0
- tests/unit/connectors/test_smb.py +442 -0
- tests/unit/connectors/test_utils.py +335 -0
- tests/unit/models/test_local.py +54 -0
- tests/unit/models/test_pipeline_stages.py +144 -0
- tests/unit/models/test_workflows.py +55 -0
- tests/unit/test_base.py +437 -0
- tests/unit/test_client.py +110 -0
- tests/unit/test_config.py +160 -0
- tests/unit/test_exceptions.py +182 -0
- tests/unit/test_http.py +562 -0
- xparse_client/__init__.py +111 -20
- xparse_client/_base.py +179 -0
- xparse_client/_client.py +218 -0
- xparse_client/_config.py +221 -0
- xparse_client/_http.py +350 -0
- xparse_client/api/__init__.py +14 -0
- xparse_client/api/extract.py +109 -0
- xparse_client/api/local.py +215 -0
- xparse_client/api/parse.py +209 -0
- xparse_client/api/pipeline.py +134 -0
- xparse_client/api/workflows.py +204 -0
- xparse_client/connectors/__init__.py +45 -0
- xparse_client/connectors/_utils.py +138 -0
- xparse_client/connectors/destinations/__init__.py +45 -0
- xparse_client/connectors/destinations/base.py +116 -0
- xparse_client/connectors/destinations/local.py +91 -0
- xparse_client/connectors/destinations/milvus.py +229 -0
- xparse_client/connectors/destinations/qdrant.py +238 -0
- xparse_client/connectors/destinations/s3.py +163 -0
- xparse_client/connectors/sources/__init__.py +45 -0
- xparse_client/connectors/sources/base.py +74 -0
- xparse_client/connectors/sources/ftp.py +278 -0
- xparse_client/connectors/sources/local.py +176 -0
- xparse_client/connectors/sources/s3.py +232 -0
- xparse_client/connectors/sources/smb.py +259 -0
- xparse_client/exceptions.py +398 -0
- xparse_client/models/__init__.py +60 -0
- xparse_client/models/chunk.py +39 -0
- xparse_client/models/embed.py +62 -0
- xparse_client/models/extract.py +41 -0
- xparse_client/models/local.py +38 -0
- xparse_client/models/parse.py +136 -0
- xparse_client/models/pipeline.py +134 -0
- xparse_client/models/workflows.py +74 -0
- xparse_client-0.3.0b3.dist-info/METADATA +1075 -0
- xparse_client-0.3.0b3.dist-info/RECORD +68 -0
- {xparse_client-0.2.11.dist-info → xparse_client-0.3.0b3.dist-info}/WHEEL +1 -1
- {xparse_client-0.2.11.dist-info → xparse_client-0.3.0b3.dist-info}/licenses/LICENSE +1 -1
- {xparse_client-0.2.11.dist-info → xparse_client-0.3.0b3.dist-info}/top_level.txt +1 -0
- example/run_pipeline.py +0 -506
- example/run_pipeline_test.py +0 -458
- xparse_client/pipeline/__init__.py +0 -3
- xparse_client/pipeline/config.py +0 -129
- xparse_client/pipeline/destinations.py +0 -487
- xparse_client/pipeline/pipeline.py +0 -622
- xparse_client/pipeline/sources.py +0 -585
- xparse_client-0.2.11.dist-info/METADATA +0 -1050
- xparse_client-0.2.11.dist-info/RECORD +0 -13
tests/conftest.py
ADDED
|
@@ -0,0 +1,310 @@
|
|
|
1
|
+
"""pytest 配置和共享 fixtures"""
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@pytest.fixture
|
|
7
|
+
def app_id() -> str:
|
|
8
|
+
"""测试用 app_id"""
|
|
9
|
+
return "test-app-id"
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@pytest.fixture
|
|
13
|
+
def secret_code() -> str:
|
|
14
|
+
"""测试用 secret_code"""
|
|
15
|
+
return "test-secret-code"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@pytest.fixture
|
|
19
|
+
def server_url() -> str:
|
|
20
|
+
"""测试用服务器地址"""
|
|
21
|
+
return "https://api.test.com"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@pytest.fixture
|
|
25
|
+
def sample_pdf_bytes() -> bytes:
|
|
26
|
+
"""测试用 PDF 文件内容(模拟)"""
|
|
27
|
+
return b"%PDF-1.4 sample pdf content"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@pytest.fixture
|
|
31
|
+
def sample_response_data() -> dict:
|
|
32
|
+
"""测试用 API 响应数据"""
|
|
33
|
+
return {
|
|
34
|
+
"code": 200,
|
|
35
|
+
"data": {
|
|
36
|
+
"elements": [
|
|
37
|
+
{
|
|
38
|
+
"element_id": "elem_001",
|
|
39
|
+
"type": "text",
|
|
40
|
+
"text": "这是一段测试文本",
|
|
41
|
+
"metadata": {"page_number": 1},
|
|
42
|
+
}
|
|
43
|
+
],
|
|
44
|
+
"success_count": 1,
|
|
45
|
+
"record_id": "rec_123",
|
|
46
|
+
},
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
# ============================================================================
|
|
51
|
+
# 新增 Fixtures(Phase 1 测试补充)
|
|
52
|
+
# ============================================================================
|
|
53
|
+
|
|
54
|
+
@pytest.fixture
|
|
55
|
+
def parse_success_response() -> dict:
|
|
56
|
+
"""Parse API 成功响应(真实格式)"""
|
|
57
|
+
return {
|
|
58
|
+
"code": 200,
|
|
59
|
+
"data": {
|
|
60
|
+
"elements": [
|
|
61
|
+
{
|
|
62
|
+
"element_id": "d0c8f3bf754cf4b071b62402d6932ddcb10f3c7e",
|
|
63
|
+
"type": "Header",
|
|
64
|
+
"metadata": {
|
|
65
|
+
"category_depth": -1,
|
|
66
|
+
"coordinates": [0.1277, 0.0909, 0.3025, 0.0909, 0.3025, 0.1171, 0.1277, 0.1176],
|
|
67
|
+
"data_source": {
|
|
68
|
+
"date_created": "2026-01-27T09:47:51Z",
|
|
69
|
+
"date_modified": "2026-01-27T09:47:51Z",
|
|
70
|
+
"date_processed": "2026-01-27T09:47:51Z",
|
|
71
|
+
"record_locator": {
|
|
72
|
+
"protocol": "file",
|
|
73
|
+
"remote_file_path": "temp/test.pdf"
|
|
74
|
+
},
|
|
75
|
+
"url": "file://temp/test.pdf",
|
|
76
|
+
"version": "2026-01-27T09:47:51Z"
|
|
77
|
+
},
|
|
78
|
+
"filename": "test.pdf",
|
|
79
|
+
"filetype": "application/pdf",
|
|
80
|
+
"last_modified": "2026-01-27T09:47:51Z",
|
|
81
|
+
"page_height": 1683,
|
|
82
|
+
"page_number": 1,
|
|
83
|
+
"page_width": 1190
|
|
84
|
+
},
|
|
85
|
+
"text": "测试标题"
|
|
86
|
+
}
|
|
87
|
+
],
|
|
88
|
+
"job_id": "5f9f10b8b4c8467882f67ae16bd48746",
|
|
89
|
+
"success_count": 1
|
|
90
|
+
},
|
|
91
|
+
"message": "success",
|
|
92
|
+
"x_request_id": "e4193bf242b8bb095295a6e7df0cacb1"
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
@pytest.fixture
|
|
97
|
+
def extract_success_response() -> dict:
|
|
98
|
+
"""Extract API 成功响应(真实格式)"""
|
|
99
|
+
return {
|
|
100
|
+
"code": 200,
|
|
101
|
+
"data": {
|
|
102
|
+
"job_id": "f76c31eedc124580a6daccba25e8ae27",
|
|
103
|
+
"result": {
|
|
104
|
+
"success_count": 1,
|
|
105
|
+
"extracted_schema": {
|
|
106
|
+
"姓名": "陈君曜"
|
|
107
|
+
},
|
|
108
|
+
"citations": {
|
|
109
|
+
"姓名": {
|
|
110
|
+
"bounding_regions": [
|
|
111
|
+
{
|
|
112
|
+
"page_number": 1,
|
|
113
|
+
"position": [175, 86, 245, 86, 245, 112, 175, 112],
|
|
114
|
+
"text": "陈君曜"
|
|
115
|
+
}
|
|
116
|
+
],
|
|
117
|
+
"value": "陈君曜"
|
|
118
|
+
}
|
|
119
|
+
},
|
|
120
|
+
"pages": [
|
|
121
|
+
{
|
|
122
|
+
"page_number": 1,
|
|
123
|
+
"image_id": "",
|
|
124
|
+
"height": 566,
|
|
125
|
+
"width": 800,
|
|
126
|
+
"angle": 0,
|
|
127
|
+
"status": "Success",
|
|
128
|
+
"durations": 0
|
|
129
|
+
}
|
|
130
|
+
]
|
|
131
|
+
},
|
|
132
|
+
"success_count": 1
|
|
133
|
+
},
|
|
134
|
+
"message": "success",
|
|
135
|
+
"x_request_id": "eaf843de96084fd635b901394e500b40"
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
@pytest.fixture
|
|
140
|
+
def business_error_response() -> dict:
|
|
141
|
+
"""业务错误响应(HTTP 200 + code 非 200)"""
|
|
142
|
+
return {
|
|
143
|
+
"code": 400,
|
|
144
|
+
"message": "参数错误:schema 格式不正确",
|
|
145
|
+
"data": None,
|
|
146
|
+
"x_request_id": "abc123"
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
@pytest.fixture
|
|
151
|
+
def pipeline_parse_chunk_embed_response() -> dict:
|
|
152
|
+
"""Pipeline API 响应(parse + chunk + embed,含中间结果)"""
|
|
153
|
+
return {
|
|
154
|
+
"code": 200,
|
|
155
|
+
"data": {
|
|
156
|
+
"elements": [
|
|
157
|
+
{
|
|
158
|
+
"element_id": "elem_001",
|
|
159
|
+
"type": "CompositeElement",
|
|
160
|
+
"metadata": {
|
|
161
|
+
"coordinates": [0.05, 0.02, 0.94, 0.02, 0.94, 0.21, 0.05, 0.21],
|
|
162
|
+
"data_source": {
|
|
163
|
+
"date_created": "2026-01-28T06:44:39Z",
|
|
164
|
+
"date_modified": "2026-01-28T06:44:39Z",
|
|
165
|
+
"date_processed": "2026-01-28T06:44:39Z",
|
|
166
|
+
"record_locator": {
|
|
167
|
+
"protocol": "file",
|
|
168
|
+
"remote_file_path": "temp/test.pdf"
|
|
169
|
+
},
|
|
170
|
+
"url": "file://temp/test.pdf",
|
|
171
|
+
"version": "2026-01-28T06:44:39Z"
|
|
172
|
+
},
|
|
173
|
+
"filename": "test.pdf",
|
|
174
|
+
"filetype": "application/pdf",
|
|
175
|
+
"is_continuation": False,
|
|
176
|
+
"page_height": 821,
|
|
177
|
+
"page_number": 1,
|
|
178
|
+
"page_width": 1270
|
|
179
|
+
},
|
|
180
|
+
"text": "测试文本内容",
|
|
181
|
+
"embeddings": [0.01, -0.105, 0.025] + [0.0] * 1021 # 1024 维向量
|
|
182
|
+
}
|
|
183
|
+
],
|
|
184
|
+
"intermediate_results": [
|
|
185
|
+
{
|
|
186
|
+
"stage": "parse",
|
|
187
|
+
"elements": [
|
|
188
|
+
{
|
|
189
|
+
"element_id": "elem_parse_001",
|
|
190
|
+
"type": "Header",
|
|
191
|
+
"text": "原始标题",
|
|
192
|
+
"metadata": {}
|
|
193
|
+
}
|
|
194
|
+
]
|
|
195
|
+
},
|
|
196
|
+
{
|
|
197
|
+
"stage": "chunk",
|
|
198
|
+
"elements": [
|
|
199
|
+
{
|
|
200
|
+
"element_id": "elem_001",
|
|
201
|
+
"type": "CompositeElement",
|
|
202
|
+
"text": "测试文本内容",
|
|
203
|
+
"metadata": {}
|
|
204
|
+
}
|
|
205
|
+
]
|
|
206
|
+
}
|
|
207
|
+
],
|
|
208
|
+
"job_id": "job_123",
|
|
209
|
+
"stats": {
|
|
210
|
+
"original_elements": 15,
|
|
211
|
+
"chunked_elements": 3,
|
|
212
|
+
"embedded_elements": 3,
|
|
213
|
+
"stages": [
|
|
214
|
+
{"type": "parse", "config": {"provider": "textin"}},
|
|
215
|
+
{"type": "chunk", "config": {"strategy": "basic"}},
|
|
216
|
+
{"type": "embed", "config": {"provider": "qwen", "model_name": "text-embedding-v4"}}
|
|
217
|
+
],
|
|
218
|
+
"record_id": "rec_123",
|
|
219
|
+
"success_count": 1
|
|
220
|
+
}
|
|
221
|
+
},
|
|
222
|
+
"message": "success",
|
|
223
|
+
"x_request_id": "req_123"
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
@pytest.fixture
|
|
228
|
+
def pipeline_parse_extract_response() -> dict:
|
|
229
|
+
"""Pipeline API 响应(parse + extract,含中间结果)"""
|
|
230
|
+
return {
|
|
231
|
+
"code": 200,
|
|
232
|
+
"data": {
|
|
233
|
+
"extract_result": {
|
|
234
|
+
"success_count": 1,
|
|
235
|
+
"extracted_schema": {"购买方": "杭州百世网络技术有限公司"},
|
|
236
|
+
"citations": {
|
|
237
|
+
"购买方": {
|
|
238
|
+
"bounding_regions": [
|
|
239
|
+
{
|
|
240
|
+
"page_number": 1,
|
|
241
|
+
"position": [224, 177, 541, 177, 541, 197, 224, 197],
|
|
242
|
+
"text": "杭州百世网络技术有限公司"
|
|
243
|
+
}
|
|
244
|
+
],
|
|
245
|
+
"value": "杭州百世网络技术有限公司"
|
|
246
|
+
}
|
|
247
|
+
},
|
|
248
|
+
"pages": [{"page_number": 1, "height": 821, "width": 1270}]
|
|
249
|
+
},
|
|
250
|
+
"intermediate_results": [
|
|
251
|
+
{
|
|
252
|
+
"stage": "parse",
|
|
253
|
+
"elements": [
|
|
254
|
+
{
|
|
255
|
+
"element_id": "elem_001",
|
|
256
|
+
"type": "Image",
|
|
257
|
+
"text": "",
|
|
258
|
+
"metadata": {}
|
|
259
|
+
}
|
|
260
|
+
]
|
|
261
|
+
}
|
|
262
|
+
],
|
|
263
|
+
"job_id": "job_456",
|
|
264
|
+
"stats": {
|
|
265
|
+
"original_elements": 15,
|
|
266
|
+
"stages": [
|
|
267
|
+
{"type": "parse", "config": {"provider": "textin"}},
|
|
268
|
+
{"type": "extract", "config": {"schema": {}}}
|
|
269
|
+
],
|
|
270
|
+
"record_id": "rec_456",
|
|
271
|
+
"success_count": 1
|
|
272
|
+
}
|
|
273
|
+
},
|
|
274
|
+
"message": "success",
|
|
275
|
+
"x_request_id": "req_456"
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
# ============================================================================
|
|
280
|
+
# 可选依赖检查 Helper
|
|
281
|
+
# ============================================================================
|
|
282
|
+
|
|
283
|
+
# 检查可选依赖是否安装
|
|
284
|
+
try:
|
|
285
|
+
import boto3 # noqa: F401
|
|
286
|
+
|
|
287
|
+
has_boto3 = True
|
|
288
|
+
except ImportError:
|
|
289
|
+
has_boto3 = False
|
|
290
|
+
|
|
291
|
+
try:
|
|
292
|
+
import pymilvus # noqa: F401
|
|
293
|
+
|
|
294
|
+
has_pymilvus = True
|
|
295
|
+
except ImportError:
|
|
296
|
+
has_pymilvus = False
|
|
297
|
+
|
|
298
|
+
try:
|
|
299
|
+
import qdrant_client # noqa: F401
|
|
300
|
+
|
|
301
|
+
has_qdrant = True
|
|
302
|
+
except ImportError:
|
|
303
|
+
has_qdrant = False
|
|
304
|
+
|
|
305
|
+
try:
|
|
306
|
+
from smb.SMBConnection import SMBConnection # noqa: F401
|
|
307
|
+
|
|
308
|
+
has_smb = True
|
|
309
|
+
except ImportError:
|
|
310
|
+
has_smb = False
|
tests/unit/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""单元测试模块"""
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""API 测试模块"""
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
"""Extract API 测试
|
|
2
|
+
|
|
3
|
+
测试 Extract API 的所有功能,包括:
|
|
4
|
+
- 基础提取功能
|
|
5
|
+
- parse_config 支持
|
|
6
|
+
- extract_config 参数验证
|
|
7
|
+
- 错误处理
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import httpx
|
|
11
|
+
import pytest
|
|
12
|
+
import respx
|
|
13
|
+
|
|
14
|
+
from xparse_client import XParseClient
|
|
15
|
+
from xparse_client.exceptions import ServerError, ValidationError
|
|
16
|
+
from xparse_client.models import ExtractConfig, ParseConfig
|
|
17
|
+
|
|
18
|
+
# ============================================================================
|
|
19
|
+
# 基础功能测试
|
|
20
|
+
# ============================================================================
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def test_extract_api_exists():
|
|
24
|
+
"""测试 client.extract 属性存在"""
|
|
25
|
+
client = XParseClient(app_id="test", secret_code="test")
|
|
26
|
+
|
|
27
|
+
assert hasattr(client, "extract")
|
|
28
|
+
assert client.extract is not None
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@respx.mock
|
|
32
|
+
def test_extract_basic(server_url, extract_success_response):
|
|
33
|
+
"""测试基础提取功能"""
|
|
34
|
+
# Mock API
|
|
35
|
+
respx.post(f"{server_url}/api/xparse/extract/sync").mock(
|
|
36
|
+
return_value=httpx.Response(200, json=extract_success_response)
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
# 初始化客户端
|
|
40
|
+
client = XParseClient(app_id="test", secret_code="test", server_url=server_url)
|
|
41
|
+
|
|
42
|
+
# 创建 extract_config
|
|
43
|
+
extract_config = ExtractConfig(
|
|
44
|
+
schema_={
|
|
45
|
+
"type": "object",
|
|
46
|
+
"properties": {
|
|
47
|
+
"姓名": {"type": "string"}
|
|
48
|
+
},
|
|
49
|
+
"required": ["姓名"]
|
|
50
|
+
}
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
# 执行提取
|
|
54
|
+
result = client.extract.extract(
|
|
55
|
+
file=b"test pdf content",
|
|
56
|
+
filename="test.pdf",
|
|
57
|
+
extract_config=extract_config
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
# 验证结果
|
|
61
|
+
assert result is not None
|
|
62
|
+
assert result.job_id == "f76c31eedc124580a6daccba25e8ae27"
|
|
63
|
+
assert result.result is not None
|
|
64
|
+
assert "extracted_schema" in result.result
|
|
65
|
+
# 验证请求包含 extract_config
|
|
66
|
+
request = respx.calls.last.request
|
|
67
|
+
assert b"extract_config" in request.content
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
@respx.mock
|
|
71
|
+
def test_extract_with_parse_config(server_url, extract_success_response):
|
|
72
|
+
"""测试指定 parse_config"""
|
|
73
|
+
# Mock API
|
|
74
|
+
respx.post(f"{server_url}/api/xparse/extract/sync").mock(
|
|
75
|
+
return_value=httpx.Response(200, json=extract_success_response)
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
# 初始化客户端
|
|
79
|
+
client = XParseClient(app_id="test", secret_code="test", server_url=server_url)
|
|
80
|
+
|
|
81
|
+
# 创建配置
|
|
82
|
+
parse_config = ParseConfig(provider="mineru")
|
|
83
|
+
extract_config = ExtractConfig(
|
|
84
|
+
schema_={
|
|
85
|
+
"type": "object",
|
|
86
|
+
"properties": {"姓名": {"type": "string"}}
|
|
87
|
+
}
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
# 执行提取
|
|
91
|
+
result = client.extract.extract(
|
|
92
|
+
file=b"test",
|
|
93
|
+
filename="test.pdf",
|
|
94
|
+
parse_config=parse_config,
|
|
95
|
+
extract_config=extract_config
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
# 验证结果
|
|
99
|
+
assert result is not None
|
|
100
|
+
|
|
101
|
+
# 验证请求中包含 parse_config
|
|
102
|
+
request = respx.calls.last.request
|
|
103
|
+
assert b"parse_config" in request.content
|
|
104
|
+
assert b"mineru" in request.content
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def test_extract_missing_extract_config():
|
|
108
|
+
"""测试缺少 extract_config 时抛出 ValueError"""
|
|
109
|
+
client = XParseClient(app_id="test", secret_code="test")
|
|
110
|
+
|
|
111
|
+
# 不提供 extract_config 应该抛出 ValueError
|
|
112
|
+
with pytest.raises(ValueError) as exc_info:
|
|
113
|
+
client.extract.extract(
|
|
114
|
+
file=b"test",
|
|
115
|
+
filename="test.pdf",
|
|
116
|
+
extract_config=None
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
assert "extract_config is required" in str(exc_info.value)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
# ============================================================================
|
|
123
|
+
# 错误处理测试
|
|
124
|
+
# ============================================================================
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
@respx.mock
|
|
128
|
+
def test_extract_invalid_schema_error(server_url, business_error_response):
|
|
129
|
+
"""测试无效 schema 错误(业务 code 400)"""
|
|
130
|
+
# Mock API 返回业务错误
|
|
131
|
+
respx.post(f"{server_url}/api/xparse/extract/sync").mock(
|
|
132
|
+
return_value=httpx.Response(200, json=business_error_response)
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
client = XParseClient(app_id="test", secret_code="test", server_url=server_url)
|
|
136
|
+
extract_config = ExtractConfig(schema_={"invalid": "schema"})
|
|
137
|
+
|
|
138
|
+
# 应该抛出 ValidationError
|
|
139
|
+
with pytest.raises(ValidationError) as exc_info:
|
|
140
|
+
client.extract.extract(
|
|
141
|
+
file=b"test",
|
|
142
|
+
filename="test.pdf",
|
|
143
|
+
extract_config=extract_config
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
assert "参数错误" in str(exc_info.value)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
@respx.mock
|
|
150
|
+
def test_extract_server_error(server_url):
|
|
151
|
+
"""测试服务器错误(HTTP 500 或业务 code 500)"""
|
|
152
|
+
# Mock API 返回 500 错误
|
|
153
|
+
respx.post(f"{server_url}/api/xparse/extract/sync").mock(
|
|
154
|
+
return_value=httpx.Response(500, json={"message": "服务器内部错误"})
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
client = XParseClient(app_id="test", secret_code="test", server_url=server_url)
|
|
158
|
+
extract_config = ExtractConfig(
|
|
159
|
+
schema_={
|
|
160
|
+
"type": "object",
|
|
161
|
+
"properties": {"name": {"type": "string"}}
|
|
162
|
+
}
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
# 应该抛出 ServerError
|
|
166
|
+
with pytest.raises(ServerError) as exc_info:
|
|
167
|
+
client.extract.extract(
|
|
168
|
+
file=b"test",
|
|
169
|
+
filename="test.pdf",
|
|
170
|
+
extract_config=extract_config
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
assert "服务器" in str(exc_info.value)
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
# ============================================================================
|
|
177
|
+
# ExtractConfig 参数测试
|
|
178
|
+
# ============================================================================
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
@respx.mock
|
|
182
|
+
def test_extract_with_generate_citations(server_url, extract_success_response):
|
|
183
|
+
"""测试 generate_citations 参数"""
|
|
184
|
+
respx.post(f"{server_url}/api/xparse/extract/sync").mock(
|
|
185
|
+
return_value=httpx.Response(200, json=extract_success_response)
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
client = XParseClient(app_id="test", secret_code="test", server_url=server_url)
|
|
189
|
+
extract_config = ExtractConfig(
|
|
190
|
+
schema_={"type": "object", "properties": {"姓名": {"type": "string"}}},
|
|
191
|
+
generate_citations=True
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
result = client.extract.extract(
|
|
195
|
+
file=b"test",
|
|
196
|
+
filename="test.pdf",
|
|
197
|
+
extract_config=extract_config
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
# 验证请求中包含 generate_citations
|
|
201
|
+
request = respx.calls.last.request
|
|
202
|
+
assert b"generate_citations" in request.content
|
|
203
|
+
|
|
204
|
+
# 验证结果
|
|
205
|
+
assert result is not None
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
@respx.mock
|
|
209
|
+
def test_extract_with_stamp(server_url, extract_success_response):
|
|
210
|
+
"""测试 stamp 参数(印章提取)"""
|
|
211
|
+
respx.post(f"{server_url}/api/xparse/extract/sync").mock(
|
|
212
|
+
return_value=httpx.Response(200, json=extract_success_response)
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
client = XParseClient(app_id="test", secret_code="test", server_url=server_url)
|
|
216
|
+
extract_config = ExtractConfig(
|
|
217
|
+
schema_={"type": "object", "properties": {"公司名称": {"type": "string"}}},
|
|
218
|
+
stamp=True
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
result = client.extract.extract(
|
|
222
|
+
file=b"test invoice",
|
|
223
|
+
filename="invoice.pdf",
|
|
224
|
+
extract_config=extract_config
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
# 验证请求中包含 stamp
|
|
228
|
+
request = respx.calls.last.request
|
|
229
|
+
assert b"stamp" in request.content
|
|
230
|
+
|
|
231
|
+
# 验证结果
|
|
232
|
+
assert result is not None
|