xparse-client 0.3.0b8__py3-none-any.whl → 0.3.0b10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- example/1_basic_api_usage.py +3 -3
- example/2_async_job.py +19 -27
- tests/unit/api/test_parse.py +161 -116
- tests/unit/test_exceptions.py +2 -3
- xparse_client/_http.py +1 -1
- xparse_client/api/parse.py +9 -13
- xparse_client/exceptions.py +5 -5
- {xparse_client-0.3.0b8.dist-info → xparse_client-0.3.0b10.dist-info}/METADATA +22 -18
- {xparse_client-0.3.0b8.dist-info → xparse_client-0.3.0b10.dist-info}/RECORD +12 -12
- {xparse_client-0.3.0b8.dist-info → xparse_client-0.3.0b10.dist-info}/WHEEL +0 -0
- {xparse_client-0.3.0b8.dist-info → xparse_client-0.3.0b10.dist-info}/licenses/LICENSE +0 -0
- {xparse_client-0.3.0b8.dist-info → xparse_client-0.3.0b10.dist-info}/top_level.txt +0 -0
example/1_basic_api_usage.py
CHANGED
|
@@ -95,9 +95,9 @@ def example_2_extract_structured_data():
|
|
|
95
95
|
)
|
|
96
96
|
|
|
97
97
|
print("\n✅ 提取成功!")
|
|
98
|
-
# Extract API 返回的数据在
|
|
99
|
-
if result.
|
|
100
|
-
print(f" 提取的数据: {result.
|
|
98
|
+
# Extract API 返回的数据在 result.result 中
|
|
99
|
+
if result.result:
|
|
100
|
+
print(f" 提取的数据: {result.result}")
|
|
101
101
|
else:
|
|
102
102
|
print(" 未提取到数据")
|
|
103
103
|
|
example/2_async_job.py
CHANGED
|
@@ -22,9 +22,9 @@ from xparse_client.models import ParseConfig
|
|
|
22
22
|
|
|
23
23
|
def example_1_create_and_wait():
|
|
24
24
|
"""示例 2.1: 创建异步任务并等待完成"""
|
|
25
|
-
print("\n" + "="*60)
|
|
25
|
+
print("\n" + "=" * 60)
|
|
26
26
|
print("示例 2.1: 创建异步任务并等待完成")
|
|
27
|
-
print("="*60)
|
|
27
|
+
print("=" * 60)
|
|
28
28
|
|
|
29
29
|
client = XParseClient.from_env()
|
|
30
30
|
|
|
@@ -37,9 +37,7 @@ def example_1_create_and_wait():
|
|
|
37
37
|
print("\n📤 创建异步任务...")
|
|
38
38
|
with open(test_file, "rb") as f:
|
|
39
39
|
job = client.parse.create_async_job(
|
|
40
|
-
file=f,
|
|
41
|
-
filename=test_file.name,
|
|
42
|
-
config=ParseConfig(provider="textin")
|
|
40
|
+
file=f, filename=test_file.name, config=ParseConfig(provider="textin")
|
|
43
41
|
)
|
|
44
42
|
|
|
45
43
|
print("✅ 任务已创建:")
|
|
@@ -47,18 +45,15 @@ def example_1_create_and_wait():
|
|
|
47
45
|
|
|
48
46
|
# 等待任务完成
|
|
49
47
|
print("\n⏳ 等待任务完成...")
|
|
50
|
-
result = client.parse.wait_for_result(
|
|
51
|
-
job_id=job.job_id,
|
|
52
|
-
timeout_seconds=60.0,
|
|
53
|
-
poll_interval_seconds=2.0
|
|
54
|
-
)
|
|
48
|
+
result = client.parse.wait_for_result(job_id=job.job_id, timeout=60.0, poll_interval=2.0)
|
|
55
49
|
|
|
56
50
|
if result.is_completed:
|
|
57
51
|
print("\n✅ 任务完成!")
|
|
58
52
|
print(f" - 任务 ID: {result.job_id}")
|
|
59
53
|
print(f" - 文件 ID: {result.file_id}")
|
|
60
54
|
print(f" - 结果 URL: {result.result_url}")
|
|
61
|
-
print("\n 💡 提示:
|
|
55
|
+
print("\n 💡 提示: 异步任务只返回 result_url,需要另外下载来获取解析结果")
|
|
56
|
+
print(" result_url 是一个可以下载的 JSON 文件 URL")
|
|
62
57
|
elif result.is_failed:
|
|
63
58
|
print(f"\n❌ 任务失败: {result.error_message}")
|
|
64
59
|
else:
|
|
@@ -70,9 +65,9 @@ def example_1_create_and_wait():
|
|
|
70
65
|
|
|
71
66
|
def example_2_manual_polling():
|
|
72
67
|
"""示例 2.2: 手动轮询任务状态"""
|
|
73
|
-
print("\n" + "="*60)
|
|
68
|
+
print("\n" + "=" * 60)
|
|
74
69
|
print("示例 2.2: 手动轮询任务状态")
|
|
75
|
-
print("="*60)
|
|
70
|
+
print("=" * 60)
|
|
76
71
|
|
|
77
72
|
client = XParseClient.from_env()
|
|
78
73
|
|
|
@@ -84,9 +79,7 @@ def example_2_manual_polling():
|
|
|
84
79
|
# 创建异步任务
|
|
85
80
|
with open(test_file, "rb") as f:
|
|
86
81
|
job = client.parse.create_async_job(
|
|
87
|
-
file=f,
|
|
88
|
-
filename=test_file.name,
|
|
89
|
-
config=ParseConfig(provider="textin")
|
|
82
|
+
file=f, filename=test_file.name, config=ParseConfig(provider="textin")
|
|
90
83
|
)
|
|
91
84
|
|
|
92
85
|
print(f"✅ 任务已创建: {job.job_id}")
|
|
@@ -128,9 +121,9 @@ def example_2_manual_polling():
|
|
|
128
121
|
|
|
129
122
|
def example_3_error_handling():
|
|
130
123
|
"""示例 2.3: 异步任务错误处理"""
|
|
131
|
-
print("\n" + "="*60)
|
|
124
|
+
print("\n" + "=" * 60)
|
|
132
125
|
print("示例 2.3: 异步任务错误处理")
|
|
133
|
-
print("="*60)
|
|
126
|
+
print("=" * 60)
|
|
134
127
|
|
|
135
128
|
client = XParseClient.from_env()
|
|
136
129
|
|
|
@@ -142,9 +135,7 @@ def example_3_error_handling():
|
|
|
142
135
|
print("\n📤 创建任务并设置很短的超时时间...")
|
|
143
136
|
with open(test_file, "rb") as f:
|
|
144
137
|
job = client.parse.create_async_job(
|
|
145
|
-
file=f,
|
|
146
|
-
filename=test_file.name,
|
|
147
|
-
config=ParseConfig(provider="textin")
|
|
138
|
+
file=f, filename=test_file.name, config=ParseConfig(provider="textin")
|
|
148
139
|
)
|
|
149
140
|
|
|
150
141
|
print(f"✅ 任务已创建: {job.job_id}")
|
|
@@ -153,8 +144,8 @@ def example_3_error_handling():
|
|
|
153
144
|
try:
|
|
154
145
|
result = client.parse.wait_for_result(
|
|
155
146
|
job_id=job.job_id,
|
|
156
|
-
|
|
157
|
-
|
|
147
|
+
timeout=0.5, # 很短的超时
|
|
148
|
+
poll_interval=0.2,
|
|
158
149
|
)
|
|
159
150
|
|
|
160
151
|
if result.is_completed:
|
|
@@ -173,9 +164,9 @@ def example_3_error_handling():
|
|
|
173
164
|
|
|
174
165
|
def main():
|
|
175
166
|
"""主函数"""
|
|
176
|
-
print("\n" + "="*60)
|
|
167
|
+
print("\n" + "=" * 60)
|
|
177
168
|
print("xparse-client 服务端异步任务示例")
|
|
178
|
-
print("="*60)
|
|
169
|
+
print("=" * 60)
|
|
179
170
|
|
|
180
171
|
# 检查环境变量
|
|
181
172
|
if not os.getenv("TEXTIN_APP_ID") or not os.getenv("TEXTIN_SECRET_CODE"):
|
|
@@ -196,13 +187,14 @@ def main():
|
|
|
196
187
|
example_2_manual_polling()
|
|
197
188
|
example_3_error_handling()
|
|
198
189
|
|
|
199
|
-
print("\n" + "="*60)
|
|
190
|
+
print("\n" + "=" * 60)
|
|
200
191
|
print("✅ 所有示例运行完成!")
|
|
201
|
-
print("="*60)
|
|
192
|
+
print("=" * 60)
|
|
202
193
|
|
|
203
194
|
except Exception as e:
|
|
204
195
|
print(f"\n❌ 错误: {e}")
|
|
205
196
|
import traceback
|
|
197
|
+
|
|
206
198
|
traceback.print_exc()
|
|
207
199
|
|
|
208
200
|
|
tests/unit/api/test_parse.py
CHANGED
|
@@ -59,10 +59,13 @@ class TestParseAPI:
|
|
|
59
59
|
def test_create_async_job(self, client, sample_pdf_bytes):
|
|
60
60
|
"""测试创建异步任务"""
|
|
61
61
|
respx.post(f"{client.config.server_url}/api/xparse/parse/async").mock(
|
|
62
|
-
return_value=httpx.Response(
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
62
|
+
return_value=httpx.Response(
|
|
63
|
+
200,
|
|
64
|
+
json={
|
|
65
|
+
"code": 200,
|
|
66
|
+
"data": {"job_id": "job_123"},
|
|
67
|
+
},
|
|
68
|
+
)
|
|
66
69
|
)
|
|
67
70
|
|
|
68
71
|
result = client.parse.create_async_job(
|
|
@@ -76,10 +79,13 @@ class TestParseAPI:
|
|
|
76
79
|
def test_create_async_job_with_config(self, client, sample_pdf_bytes):
|
|
77
80
|
"""测试创建异步任务(带配置)"""
|
|
78
81
|
respx.post(f"{client.config.server_url}/api/xparse/parse/async").mock(
|
|
79
|
-
return_value=httpx.Response(
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
82
|
+
return_value=httpx.Response(
|
|
83
|
+
200,
|
|
84
|
+
json={
|
|
85
|
+
"code": 200,
|
|
86
|
+
"data": {"job_id": "job_456"},
|
|
87
|
+
},
|
|
88
|
+
)
|
|
83
89
|
)
|
|
84
90
|
|
|
85
91
|
result = client.parse.create_async_job(
|
|
@@ -94,10 +100,13 @@ class TestParseAPI:
|
|
|
94
100
|
def test_create_async_job_with_webhook(self, client, sample_pdf_bytes):
|
|
95
101
|
"""测试创建异步任务(带 webhook)"""
|
|
96
102
|
respx.post(f"{client.config.server_url}/api/xparse/parse/async").mock(
|
|
97
|
-
return_value=httpx.Response(
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
103
|
+
return_value=httpx.Response(
|
|
104
|
+
200,
|
|
105
|
+
json={
|
|
106
|
+
"code": 200,
|
|
107
|
+
"data": {"job_id": "job_789"},
|
|
108
|
+
},
|
|
109
|
+
)
|
|
101
110
|
)
|
|
102
111
|
|
|
103
112
|
result = client.parse.create_async_job(
|
|
@@ -112,18 +121,19 @@ class TestParseAPI:
|
|
|
112
121
|
def test_get_result_completed(self, client):
|
|
113
122
|
"""测试获取已完成的任务结果"""
|
|
114
123
|
respx.get(f"{client.config.server_url}/api/xparse/parse/async/job_123").mock(
|
|
115
|
-
return_value=httpx.Response(
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
"
|
|
119
|
-
"
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
124
|
+
return_value=httpx.Response(
|
|
125
|
+
200,
|
|
126
|
+
json={
|
|
127
|
+
"code": 200,
|
|
128
|
+
"data": {
|
|
129
|
+
"job_id": "job_123",
|
|
130
|
+
"file_id": "file_456",
|
|
131
|
+
"status": "completed",
|
|
132
|
+
"result_url": "https://example.com/result.json",
|
|
133
|
+
"elements": [{"element_id": "elem_001", "type": "text", "text": "test"}],
|
|
134
|
+
},
|
|
125
135
|
},
|
|
126
|
-
|
|
136
|
+
)
|
|
127
137
|
)
|
|
128
138
|
|
|
129
139
|
result = client.parse.get_result(job_id="job_123")
|
|
@@ -141,13 +151,16 @@ class TestParseAPI:
|
|
|
141
151
|
def test_get_result_in_progress(self, client):
|
|
142
152
|
"""测试获取进行中的任务"""
|
|
143
153
|
respx.get(f"{client.config.server_url}/api/xparse/parse/async/job_123").mock(
|
|
144
|
-
return_value=httpx.Response(
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
"
|
|
148
|
-
"
|
|
154
|
+
return_value=httpx.Response(
|
|
155
|
+
200,
|
|
156
|
+
json={
|
|
157
|
+
"code": 200,
|
|
158
|
+
"data": {
|
|
159
|
+
"job_id": "job_123",
|
|
160
|
+
"status": "in_progress",
|
|
161
|
+
},
|
|
149
162
|
},
|
|
150
|
-
|
|
163
|
+
)
|
|
151
164
|
)
|
|
152
165
|
|
|
153
166
|
result = client.parse.get_result(job_id="job_123")
|
|
@@ -161,13 +174,16 @@ class TestParseAPI:
|
|
|
161
174
|
def test_get_result_scheduled(self, client):
|
|
162
175
|
"""测试获取已调度但未开始的任务"""
|
|
163
176
|
respx.get(f"{client.config.server_url}/api/xparse/parse/async/job_123").mock(
|
|
164
|
-
return_value=httpx.Response(
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
"
|
|
168
|
-
"
|
|
177
|
+
return_value=httpx.Response(
|
|
178
|
+
200,
|
|
179
|
+
json={
|
|
180
|
+
"code": 200,
|
|
181
|
+
"data": {
|
|
182
|
+
"job_id": "job_123",
|
|
183
|
+
"status": "scheduled",
|
|
184
|
+
},
|
|
169
185
|
},
|
|
170
|
-
|
|
186
|
+
)
|
|
171
187
|
)
|
|
172
188
|
|
|
173
189
|
result = client.parse.get_result(job_id="job_123")
|
|
@@ -180,14 +196,17 @@ class TestParseAPI:
|
|
|
180
196
|
def test_get_result_failed(self, client):
|
|
181
197
|
"""测试获取失败的任务"""
|
|
182
198
|
respx.get(f"{client.config.server_url}/api/xparse/parse/async/job_123").mock(
|
|
183
|
-
return_value=httpx.Response(
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
"
|
|
187
|
-
"
|
|
188
|
-
|
|
199
|
+
return_value=httpx.Response(
|
|
200
|
+
200,
|
|
201
|
+
json={
|
|
202
|
+
"code": 200,
|
|
203
|
+
"data": {
|
|
204
|
+
"job_id": "job_123",
|
|
205
|
+
"status": "failed",
|
|
206
|
+
"error_message": "处理失败",
|
|
207
|
+
},
|
|
189
208
|
},
|
|
190
|
-
|
|
209
|
+
)
|
|
191
210
|
)
|
|
192
211
|
|
|
193
212
|
result = client.parse.get_result(job_id="job_123")
|
|
@@ -198,35 +217,41 @@ class TestParseAPI:
|
|
|
198
217
|
assert result.error_message == "处理失败"
|
|
199
218
|
|
|
200
219
|
@respx.mock
|
|
201
|
-
@patch(
|
|
220
|
+
@patch("time.sleep")
|
|
202
221
|
def test_wait_for_result_success(self, mock_sleep, client):
|
|
203
222
|
"""测试等待任务完成(成功)"""
|
|
204
223
|
# 第一次查询: 进行中
|
|
205
224
|
respx.get(f"{client.config.server_url}/api/xparse/parse/async/job_123").mock(
|
|
206
225
|
side_effect=[
|
|
207
|
-
httpx.Response(
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
"
|
|
211
|
-
"
|
|
226
|
+
httpx.Response(
|
|
227
|
+
200,
|
|
228
|
+
json={
|
|
229
|
+
"code": 200,
|
|
230
|
+
"data": {
|
|
231
|
+
"job_id": "job_123",
|
|
232
|
+
"status": "in_progress",
|
|
233
|
+
},
|
|
212
234
|
},
|
|
213
|
-
|
|
214
|
-
httpx.Response(
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
"
|
|
218
|
-
"
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
235
|
+
),
|
|
236
|
+
httpx.Response(
|
|
237
|
+
200,
|
|
238
|
+
json={
|
|
239
|
+
"code": 200,
|
|
240
|
+
"data": {
|
|
241
|
+
"job_id": "job_123",
|
|
242
|
+
"status": "completed",
|
|
243
|
+
"elements": [
|
|
244
|
+
{"element_id": "elem_001", "type": "text", "text": "test"}
|
|
245
|
+
],
|
|
246
|
+
},
|
|
222
247
|
},
|
|
223
|
-
|
|
248
|
+
),
|
|
224
249
|
]
|
|
225
250
|
)
|
|
226
251
|
|
|
227
252
|
result = client.parse.wait_for_result(
|
|
228
253
|
job_id="job_123",
|
|
229
|
-
|
|
254
|
+
poll_interval=1,
|
|
230
255
|
)
|
|
231
256
|
|
|
232
257
|
assert result.is_completed
|
|
@@ -236,18 +261,21 @@ class TestParseAPI:
|
|
|
236
261
|
mock_sleep.assert_called_with(1)
|
|
237
262
|
|
|
238
263
|
@respx.mock
|
|
239
|
-
@patch(
|
|
264
|
+
@patch("time.sleep")
|
|
240
265
|
def test_wait_for_result_immediate_completion(self, mock_sleep, client):
|
|
241
266
|
"""测试等待任务完成(立即完成)"""
|
|
242
267
|
respx.get(f"{client.config.server_url}/api/xparse/parse/async/job_123").mock(
|
|
243
|
-
return_value=httpx.Response(
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
"
|
|
247
|
-
"
|
|
248
|
-
|
|
268
|
+
return_value=httpx.Response(
|
|
269
|
+
200,
|
|
270
|
+
json={
|
|
271
|
+
"code": 200,
|
|
272
|
+
"data": {
|
|
273
|
+
"job_id": "job_123",
|
|
274
|
+
"status": "completed",
|
|
275
|
+
"elements": [],
|
|
276
|
+
},
|
|
249
277
|
},
|
|
250
|
-
|
|
278
|
+
)
|
|
251
279
|
)
|
|
252
280
|
|
|
253
281
|
result = client.parse.wait_for_result(job_id="job_123")
|
|
@@ -257,18 +285,21 @@ class TestParseAPI:
|
|
|
257
285
|
assert mock_sleep.call_count == 0
|
|
258
286
|
|
|
259
287
|
@respx.mock
|
|
260
|
-
@patch(
|
|
288
|
+
@patch("time.sleep")
|
|
261
289
|
def test_wait_for_result_task_failed(self, mock_sleep, client):
|
|
262
290
|
"""测试等待任务完成(任务失败)"""
|
|
263
291
|
respx.get(f"{client.config.server_url}/api/xparse/parse/async/job_123").mock(
|
|
264
|
-
return_value=httpx.Response(
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
"
|
|
268
|
-
"
|
|
269
|
-
|
|
292
|
+
return_value=httpx.Response(
|
|
293
|
+
200,
|
|
294
|
+
json={
|
|
295
|
+
"code": 200,
|
|
296
|
+
"data": {
|
|
297
|
+
"job_id": "job_123",
|
|
298
|
+
"status": "failed",
|
|
299
|
+
"error_message": "文件格式不支持",
|
|
300
|
+
},
|
|
270
301
|
},
|
|
271
|
-
|
|
302
|
+
)
|
|
272
303
|
)
|
|
273
304
|
|
|
274
305
|
with pytest.raises(APIError) as exc_info:
|
|
@@ -278,62 +309,74 @@ class TestParseAPI:
|
|
|
278
309
|
assert "文件格式不支持" in str(exc_info.value)
|
|
279
310
|
|
|
280
311
|
@respx.mock
|
|
281
|
-
@patch(
|
|
282
|
-
@patch(
|
|
312
|
+
@patch("time.time")
|
|
313
|
+
@patch("time.sleep")
|
|
283
314
|
def test_wait_for_result_timeout(self, mock_sleep, mock_time, client):
|
|
284
315
|
"""测试等待任务超时"""
|
|
285
316
|
# Mock time.time() 返回递增的时间
|
|
286
317
|
mock_time.side_effect = [0, 5, 11] # 第一次0秒,第二次5秒,第三次11秒
|
|
287
318
|
|
|
288
319
|
respx.get(f"{client.config.server_url}/api/xparse/parse/async/job_123").mock(
|
|
289
|
-
return_value=httpx.Response(
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
"
|
|
293
|
-
"
|
|
320
|
+
return_value=httpx.Response(
|
|
321
|
+
200,
|
|
322
|
+
json={
|
|
323
|
+
"code": 200,
|
|
324
|
+
"data": {
|
|
325
|
+
"job_id": "job_123",
|
|
326
|
+
"status": "in_progress",
|
|
327
|
+
},
|
|
294
328
|
},
|
|
295
|
-
|
|
329
|
+
)
|
|
296
330
|
)
|
|
297
331
|
|
|
298
332
|
with pytest.raises(RequestTimeoutError) as exc_info:
|
|
299
333
|
client.parse.wait_for_result(
|
|
300
334
|
job_id="job_123",
|
|
301
|
-
|
|
302
|
-
|
|
335
|
+
timeout=10,
|
|
336
|
+
poll_interval=5,
|
|
303
337
|
)
|
|
304
338
|
|
|
305
339
|
assert "等待解析任务超时" in str(exc_info.value)
|
|
306
340
|
assert "job_123" in str(exc_info.value)
|
|
307
341
|
|
|
308
342
|
@respx.mock
|
|
309
|
-
@patch(
|
|
343
|
+
@patch("time.sleep")
|
|
310
344
|
def test_wait_for_result_multiple_polls(self, mock_sleep, client):
|
|
311
345
|
"""测试多次轮询后完成"""
|
|
312
346
|
# 模拟 3 次轮询: scheduled -> in_progress -> completed
|
|
313
347
|
respx.get(f"{client.config.server_url}/api/xparse/parse/async/job_123").mock(
|
|
314
348
|
side_effect=[
|
|
315
|
-
httpx.Response(
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
"job_id": "job_123",
|
|
327
|
-
"status": "completed",
|
|
328
|
-
"elements": [],
|
|
349
|
+
httpx.Response(
|
|
350
|
+
200,
|
|
351
|
+
json={
|
|
352
|
+
"code": 200,
|
|
353
|
+
"data": {"job_id": "job_123", "status": "scheduled"},
|
|
354
|
+
},
|
|
355
|
+
),
|
|
356
|
+
httpx.Response(
|
|
357
|
+
200,
|
|
358
|
+
json={
|
|
359
|
+
"code": 200,
|
|
360
|
+
"data": {"job_id": "job_123", "status": "in_progress"},
|
|
329
361
|
},
|
|
330
|
-
|
|
362
|
+
),
|
|
363
|
+
httpx.Response(
|
|
364
|
+
200,
|
|
365
|
+
json={
|
|
366
|
+
"code": 200,
|
|
367
|
+
"data": {
|
|
368
|
+
"job_id": "job_123",
|
|
369
|
+
"status": "completed",
|
|
370
|
+
"elements": [],
|
|
371
|
+
},
|
|
372
|
+
},
|
|
373
|
+
),
|
|
331
374
|
]
|
|
332
375
|
)
|
|
333
376
|
|
|
334
377
|
result = client.parse.wait_for_result(
|
|
335
378
|
job_id="job_123",
|
|
336
|
-
|
|
379
|
+
poll_interval=2,
|
|
337
380
|
)
|
|
338
381
|
|
|
339
382
|
assert result.is_completed
|
|
@@ -344,31 +387,33 @@ class TestParseAPI:
|
|
|
344
387
|
def test_wait_for_result_custom_intervals(self, client):
|
|
345
388
|
"""测试自定义轮询参数"""
|
|
346
389
|
respx.get(f"{client.config.server_url}/api/xparse/parse/async/job_123").mock(
|
|
347
|
-
return_value=httpx.Response(
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
"
|
|
351
|
-
"
|
|
352
|
-
|
|
390
|
+
return_value=httpx.Response(
|
|
391
|
+
200,
|
|
392
|
+
json={
|
|
393
|
+
"code": 200,
|
|
394
|
+
"data": {
|
|
395
|
+
"job_id": "job_123",
|
|
396
|
+
"status": "completed",
|
|
397
|
+
"elements": [],
|
|
398
|
+
},
|
|
353
399
|
},
|
|
354
|
-
|
|
400
|
+
)
|
|
355
401
|
)
|
|
356
402
|
|
|
357
403
|
result = client.parse.wait_for_result(
|
|
358
404
|
job_id="job_123",
|
|
359
|
-
|
|
360
|
-
|
|
405
|
+
timeout=300,
|
|
406
|
+
poll_interval=10,
|
|
361
407
|
)
|
|
362
408
|
|
|
363
409
|
assert result.is_completed
|
|
364
410
|
|
|
365
411
|
def test_partition_async_removed(self, client):
|
|
366
412
|
"""测试 partition_async 方法已被移除"""
|
|
367
|
-
assert not hasattr(client.parse,
|
|
413
|
+
assert not hasattr(client.parse, "partition_async")
|
|
368
414
|
|
|
369
415
|
def test_async_job_methods_still_exist(self, client):
|
|
370
416
|
"""测试服务端异步任务方法仍然存在"""
|
|
371
|
-
assert hasattr(client.parse,
|
|
372
|
-
assert hasattr(client.parse,
|
|
373
|
-
assert hasattr(client.parse,
|
|
374
|
-
|
|
417
|
+
assert hasattr(client.parse, "create_async_job")
|
|
418
|
+
assert hasattr(client.parse, "get_result")
|
|
419
|
+
assert hasattr(client.parse, "wait_for_result")
|
tests/unit/test_exceptions.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
"""异常类测试"""
|
|
2
2
|
|
|
3
|
-
|
|
4
3
|
from xparse_client.exceptions import (
|
|
5
4
|
APIError,
|
|
6
5
|
AuthenticationError,
|
|
@@ -137,9 +136,9 @@ class TestAPIError:
|
|
|
137
136
|
"""测试超时错误"""
|
|
138
137
|
error = RequestTimeoutError(
|
|
139
138
|
"请求超时",
|
|
140
|
-
|
|
139
|
+
timeout=30.0,
|
|
141
140
|
)
|
|
142
|
-
assert error.
|
|
141
|
+
assert error.timeout == 30.0
|
|
143
142
|
|
|
144
143
|
|
|
145
144
|
class TestConnectorError:
|
xparse_client/_http.py
CHANGED
xparse_client/api/parse.py
CHANGED
|
@@ -86,7 +86,6 @@ class Parse(BaseAPI):
|
|
|
86
86
|
response = self._post("/parse/sync", files=files, data=data)
|
|
87
87
|
return self._parse_response(response, ParseResponse)
|
|
88
88
|
|
|
89
|
-
|
|
90
89
|
def create_async_job(
|
|
91
90
|
self,
|
|
92
91
|
*,
|
|
@@ -127,7 +126,6 @@ class Parse(BaseAPI):
|
|
|
127
126
|
response = self._post("/parse/async", files=files, data=data)
|
|
128
127
|
return self._parse_response(response, AsyncJobResponse)
|
|
129
128
|
|
|
130
|
-
|
|
131
129
|
def get_result(self, *, job_id: str) -> JobStatusResponse:
|
|
132
130
|
"""获取异步解析结果
|
|
133
131
|
|
|
@@ -149,13 +147,12 @@ class Parse(BaseAPI):
|
|
|
149
147
|
response = self._get(f"/parse/async/{job_id}")
|
|
150
148
|
return self._parse_response(response, JobStatusResponse)
|
|
151
149
|
|
|
152
|
-
|
|
153
150
|
def wait_for_result(
|
|
154
151
|
self,
|
|
155
152
|
*,
|
|
156
153
|
job_id: str,
|
|
157
|
-
|
|
158
|
-
|
|
154
|
+
timeout: float = 3600,
|
|
155
|
+
poll_interval: float = 5,
|
|
159
156
|
) -> JobStatusResponse:
|
|
160
157
|
"""等待异步任务完成
|
|
161
158
|
|
|
@@ -163,8 +160,8 @@ class Parse(BaseAPI):
|
|
|
163
160
|
|
|
164
161
|
Args:
|
|
165
162
|
job_id: 任务 ID
|
|
166
|
-
|
|
167
|
-
|
|
163
|
+
timeout: 超时时间(秒),默认 3600
|
|
164
|
+
poll_interval: 轮询间隔(秒),默认 5
|
|
168
165
|
|
|
169
166
|
Returns:
|
|
170
167
|
JobStatusResponse: 完成的任务结果
|
|
@@ -177,8 +174,8 @@ class Parse(BaseAPI):
|
|
|
177
174
|
>>> job = client.parse.create_async_job(file=file_bytes, filename="doc.pdf")
|
|
178
175
|
>>> result = client.parse.wait_for_result(
|
|
179
176
|
... job_id=job.job_id,
|
|
180
|
-
...
|
|
181
|
-
...
|
|
177
|
+
... timeout=600,
|
|
178
|
+
... poll_interval=10
|
|
182
179
|
... )
|
|
183
180
|
"""
|
|
184
181
|
start_time = time.time()
|
|
@@ -196,14 +193,13 @@ class Parse(BaseAPI):
|
|
|
196
193
|
)
|
|
197
194
|
|
|
198
195
|
elapsed = time.time() - start_time
|
|
199
|
-
if elapsed >
|
|
196
|
+
if elapsed > timeout:
|
|
200
197
|
raise RequestTimeoutError(
|
|
201
198
|
f"等待解析任务超时: {job_id}",
|
|
202
|
-
|
|
199
|
+
timeout=timeout,
|
|
203
200
|
)
|
|
204
201
|
|
|
205
|
-
time.sleep(
|
|
206
|
-
|
|
202
|
+
time.sleep(poll_interval)
|
|
207
203
|
|
|
208
204
|
|
|
209
205
|
__all__ = ["Parse"]
|
xparse_client/exceptions.py
CHANGED
|
@@ -248,21 +248,21 @@ class RequestTimeoutError(APIError):
|
|
|
248
248
|
请求超时时抛出。
|
|
249
249
|
|
|
250
250
|
Attributes:
|
|
251
|
-
|
|
251
|
+
timeout: 超时时间设置
|
|
252
252
|
"""
|
|
253
253
|
|
|
254
254
|
def __init__(
|
|
255
255
|
self,
|
|
256
256
|
message: str,
|
|
257
257
|
*,
|
|
258
|
-
|
|
258
|
+
timeout: float | None = None,
|
|
259
259
|
**kwargs: Any,
|
|
260
260
|
) -> None:
|
|
261
|
-
self.
|
|
261
|
+
self.timeout = timeout
|
|
262
262
|
|
|
263
263
|
details = kwargs.pop("details", {}) or {}
|
|
264
|
-
if
|
|
265
|
-
details["timeout_seconds"] =
|
|
264
|
+
if timeout:
|
|
265
|
+
details["timeout_seconds"] = timeout
|
|
266
266
|
|
|
267
267
|
super().__init__(message, details=details, **kwargs)
|
|
268
268
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: xparse-client
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.0b10
|
|
4
4
|
Summary: 面向 Agent 和 RAG 的文档处理 Pipeline 客户端
|
|
5
5
|
Author-email: INTSIG-TEXTIN <support@textin.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -223,6 +223,7 @@ with open("document.pdf", "rb") as f:
|
|
|
223
223
|
extract_config=ExtractConfig(schema=schema)
|
|
224
224
|
)
|
|
225
225
|
|
|
226
|
+
# Extract API 返回的结构化数据在 result.result 中
|
|
226
227
|
print(result.result)
|
|
227
228
|
```
|
|
228
229
|
|
|
@@ -277,22 +278,11 @@ result = client.parse.wait_for_result(
|
|
|
277
278
|
poll_interval=5.0
|
|
278
279
|
)
|
|
279
280
|
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
```python
|
|
286
|
-
# 同步调用(适合小文件)
|
|
287
|
-
result = client.parse.partition(file=f, filename="doc.pdf")
|
|
288
|
-
|
|
289
|
-
# 异步调用(相同接口,加上 _async 后缀)
|
|
290
|
-
import asyncio
|
|
291
|
-
|
|
292
|
-
async def main():
|
|
293
|
-
result = await client.parse.partition_async(file=f, filename="doc.pdf")
|
|
294
|
-
|
|
295
|
-
asyncio.run(main())
|
|
281
|
+
if result.is_completed:
|
|
282
|
+
print(f"任务完成,结果 URL: {result.result_url}")
|
|
283
|
+
print("💡 异步任务返回的是 result_url,需要另外下载来获取解析结果")
|
|
284
|
+
elif result.is_failed:
|
|
285
|
+
print(f"任务失败: {result.error_message}")
|
|
296
286
|
```
|
|
297
287
|
|
|
298
288
|
---
|
|
@@ -326,7 +316,17 @@ result = client.pipeline.execute(
|
|
|
326
316
|
|
|
327
317
|
### 📊 详细的处理统计
|
|
328
318
|
|
|
319
|
+
Pipeline API 返回详细的处理统计信息:
|
|
320
|
+
|
|
329
321
|
```python
|
|
322
|
+
# 使用 Pipeline API 时可以获取统计信息
|
|
323
|
+
result = client.pipeline.execute(
|
|
324
|
+
file=f,
|
|
325
|
+
filename="document.pdf",
|
|
326
|
+
stages=[ParseStage(...), ChunkStage(...), EmbedStage(...)]
|
|
327
|
+
)
|
|
328
|
+
|
|
329
|
+
# 访问统计信息
|
|
330
330
|
print(f"原始元素: {result.stats.original_elements}")
|
|
331
331
|
print(f"分块后: {result.stats.chunked_elements}")
|
|
332
332
|
print(f"向量化: {result.stats.embedded_elements}")
|
|
@@ -750,11 +750,15 @@ job = client.parse.create_async_job(file=f, filename="large.pdf")
|
|
|
750
750
|
|
|
751
751
|
# 方式 1:自动等待完成
|
|
752
752
|
result = client.parse.wait_for_result(job_id=job.job_id, timeout=300)
|
|
753
|
+
if result.is_completed:
|
|
754
|
+
print(f"任务完成,结果 URL: {result.result_url}")
|
|
755
|
+
# 注意:异步任务返回的是 result_url,需要另外下载来获取解析结果
|
|
753
756
|
|
|
754
757
|
# 方式 2:手动轮询
|
|
755
758
|
while True:
|
|
756
759
|
status = client.parse.get_result(job_id=job.job_id)
|
|
757
|
-
if status.
|
|
760
|
+
if status.is_completed:
|
|
761
|
+
print(f"结果 URL: {status.result_url}")
|
|
758
762
|
break
|
|
759
763
|
time.sleep(5)
|
|
760
764
|
```
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
example/1_basic_api_usage.py,sha256=
|
|
2
|
-
example/2_async_job.py,sha256=
|
|
1
|
+
example/1_basic_api_usage.py,sha256=wUng_3X-6v_dEWD0CvTKzoYUhvNkYlCML0PE2gCPC1Q,5700
|
|
2
|
+
example/2_async_job.py,sha256=n_2nv_GJPJ1t2-6yHGWK1K7uHd8InG63s8C6bL96iS8,6313
|
|
3
3
|
example/3_local_workflow.py,sha256=ZxoT8N0Nz4_s-bkLF0l6UDqBJk_eZZi99WUtUhJseXs,8980
|
|
4
4
|
example/4_advanced_workflow.py,sha256=KH6gmzoKi6oeHIPDv9snw1YehynHTDqWVq1WFtoqGws,10156
|
|
5
5
|
example/README.md,sha256=d-DumaZwbV96K6ZTEryAlDSk9HUhRBg0gG_Yekx9PqA,2336
|
|
@@ -9,12 +9,12 @@ tests/unit/__init__.py,sha256=C1308ox8hFFtq3A1vBAVPzL6r8HGVbEbC42ULCpFkjs,25
|
|
|
9
9
|
tests/unit/test_base.py,sha256=HRW2irYNAX863PBCtJWvN9EJ_UttfOwRI7CRnDdcjEk,11599
|
|
10
10
|
tests/unit/test_client.py,sha256=92geEJ6jBht7nFr5suRbJ6SmkvzCu_HGB5MhSl4Qmr8,3534
|
|
11
11
|
tests/unit/test_config.py,sha256=OAUKvmEtpC-eAglnDzhp1bUyMlUpaeqLAa1QwyxCUdw,5498
|
|
12
|
-
tests/unit/test_exceptions.py,sha256=
|
|
12
|
+
tests/unit/test_exceptions.py,sha256=H_DfISn5SGEiOsFoLBNK11JVViCyfWv5a0kMfnuzfkY,5138
|
|
13
13
|
tests/unit/test_http.py,sha256=3h-O6k5rcVP_I1sPoISOYLdHAKtSxoPrDOfC3EJyNi0,16490
|
|
14
14
|
tests/unit/api/__init__.py,sha256=5HiLverbqHC-yFxXClrHBjHq62KoONLZPLMMOkYR3eU,23
|
|
15
15
|
tests/unit/api/test_extract.py,sha256=QX4_BAQN6MkRTJQocEf9u6HwOA_XoY_a2djMcuLBCTo,6959
|
|
16
16
|
tests/unit/api/test_local.py,sha256=_DXC8PattJ-CH9inlwDu9s1RCocTGVeYpKFpTiwDwa4,7541
|
|
17
|
-
tests/unit/api/test_parse.py,sha256=
|
|
17
|
+
tests/unit/api/test_parse.py,sha256=2I9RKmgCEyMy-i_g9r4j-qwsKtdcm7lIrZdhmv6t4PU,13724
|
|
18
18
|
tests/unit/api/test_pipeline.py,sha256=0TC_upZbSllQfuz7uGOCvx28BDNCnX1POR92JLnfrIE,11067
|
|
19
19
|
tests/unit/api/test_workflows.py,sha256=lYVbOWph8ZqxszFQTgcFHnxgdp9jjJM35F-wKO3IUuY,3286
|
|
20
20
|
tests/unit/connectors/test_ftp.py,sha256=7NbV7cL9tIy03L7_sWoALrfDmjda_xOgfAoBfUnF9ks,14605
|
|
@@ -31,12 +31,12 @@ xparse_client/__init__.py,sha256=FyR7W0eteapDoPj2pU4VoTobM8XR2B7f_g4yVI-v2v8,395
|
|
|
31
31
|
xparse_client/_base.py,sha256=IPYdHREM0CUvZ_kWUp4olYQfsfKzPTQoZuIrNSanKq4,4661
|
|
32
32
|
xparse_client/_client.py,sha256=ZGxZ9xhUitTP-eRhte57ekqUkrXxNubFDL5gojajA2s,6964
|
|
33
33
|
xparse_client/_config.py,sha256=BG3cEW5ywKhmtfHjfTFrdezs2Ugc9P2Y85tzu5OZZ4M,6480
|
|
34
|
-
xparse_client/_http.py,sha256=
|
|
35
|
-
xparse_client/exceptions.py,sha256=
|
|
34
|
+
xparse_client/_http.py,sha256=UMPAYZVfEmF8m718s084at6PQtHxiQDqAlv2XgmDuYc,11435
|
|
35
|
+
xparse_client/exceptions.py,sha256=pv0RhaOPsd7NrRCzKjJjqErNG52xPPrXQ7IzWNb-dvI,9473
|
|
36
36
|
xparse_client/api/__init__.py,sha256=MZ3wBTDU666efOBv81MKKtc1UkjUsjzIEOpsnDUAHD4,203
|
|
37
37
|
xparse_client/api/extract.py,sha256=87tH-y4sXPKW35l-d0GmZdK9RIXWq4tfmGZXYJhIN60,3011
|
|
38
38
|
xparse_client/api/local.py,sha256=dARW6RjiNsxliWxs7cB6qQl9eTZNvyjBpOG8qXyaIaw,8214
|
|
39
|
-
xparse_client/api/parse.py,sha256=
|
|
39
|
+
xparse_client/api/parse.py,sha256=2TBuWg8FY9Vteb06mWcNUfPsTPiIcg8g3eoKtaqP9W4,5786
|
|
40
40
|
xparse_client/api/pipeline.py,sha256=bPCSpWjER-7c7L0iMQ_Xz1VTe5DARMsX5XIcrayGKzw,4530
|
|
41
41
|
xparse_client/api/workflows.py,sha256=29_Rf4bIH1fiJb_bWShkUm-et97ym5ymQFh5i3b5mDI,5945
|
|
42
42
|
xparse_client/connectors/__init__.py,sha256=BLGxk9i8BsVCr4RMrCIh8b9cywBqXXmcDHqfF7C_FX0,1113
|
|
@@ -61,8 +61,8 @@ xparse_client/models/local.py,sha256=hqKmyWTU_EhPk6qybtBzmbLybBBaiasTpqM4_zP-ipo
|
|
|
61
61
|
xparse_client/models/parse.py,sha256=COgxroHkJce_S7d2HGd_1zBYhkpCutGtmjyLnpI2_eI,3154
|
|
62
62
|
xparse_client/models/pipeline.py,sha256=24bDzhrVotQ8St6VLEJLvG2cZF0G2AMioKI34I_hJXI,3297
|
|
63
63
|
xparse_client/models/workflows.py,sha256=BivMdGOAmhP6oYLQSGAAN7yml2xb7vHHrpzwLgN_Afk,1754
|
|
64
|
-
xparse_client-0.3.
|
|
65
|
-
xparse_client-0.3.
|
|
66
|
-
xparse_client-0.3.
|
|
67
|
-
xparse_client-0.3.
|
|
68
|
-
xparse_client-0.3.
|
|
64
|
+
xparse_client-0.3.0b10.dist-info/licenses/LICENSE,sha256=7iuki7DyWMGB8PBzsht7PUt0YjdIcPjrcXNyUFgMJsw,1070
|
|
65
|
+
xparse_client-0.3.0b10.dist-info/METADATA,sha256=SV-pasi2iASwYXsWFRdLli2uGIQpGAeBfqE5eXM-HMg,25873
|
|
66
|
+
xparse_client-0.3.0b10.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
67
|
+
xparse_client-0.3.0b10.dist-info/top_level.txt,sha256=oQGc_qysOmnSAaLjwB72wH8RBHRAmxB-_qb-Uj6u56o,28
|
|
68
|
+
xparse_client-0.3.0b10.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|