xparse-client 0.3.0b8__tar.gz → 0.3.0b10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. {xparse_client-0.3.0b8/xparse_client.egg-info → xparse_client-0.3.0b10}/PKG-INFO +22 -18
  2. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/README.md +21 -17
  3. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/example/1_basic_api_usage.py +3 -3
  4. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/example/2_async_job.py +19 -27
  5. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/pyproject.toml +1 -1
  6. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/tests/unit/api/test_parse.py +161 -116
  7. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/tests/unit/test_exceptions.py +2 -3
  8. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/xparse_client/_http.py +1 -1
  9. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/xparse_client/api/parse.py +9 -13
  10. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/xparse_client/exceptions.py +5 -5
  11. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10/xparse_client.egg-info}/PKG-INFO +22 -18
  12. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/CHANGELOG.md +0 -0
  13. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/LICENSE +0 -0
  14. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/MANIFEST.in +0 -0
  15. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/example/3_local_workflow.py +0 -0
  16. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/example/4_advanced_workflow.py +0 -0
  17. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/example/README.md +0 -0
  18. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/example/config_example.json +0 -0
  19. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/setup.cfg +0 -0
  20. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/tests/conftest.py +0 -0
  21. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/tests/unit/__init__.py +0 -0
  22. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/tests/unit/api/__init__.py +0 -0
  23. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/tests/unit/api/test_extract.py +0 -0
  24. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/tests/unit/api/test_local.py +0 -0
  25. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/tests/unit/api/test_pipeline.py +0 -0
  26. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/tests/unit/api/test_workflows.py +0 -0
  27. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/tests/unit/connectors/test_ftp.py +0 -0
  28. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/tests/unit/connectors/test_local_connectors.py +0 -0
  29. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/tests/unit/connectors/test_milvus.py +0 -0
  30. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/tests/unit/connectors/test_qdrant.py +0 -0
  31. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/tests/unit/connectors/test_s3.py +0 -0
  32. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/tests/unit/connectors/test_smb.py +0 -0
  33. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/tests/unit/connectors/test_utils.py +0 -0
  34. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/tests/unit/models/test_local.py +0 -0
  35. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/tests/unit/models/test_pipeline_stages.py +0 -0
  36. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/tests/unit/models/test_workflows.py +0 -0
  37. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/tests/unit/test_base.py +0 -0
  38. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/tests/unit/test_client.py +0 -0
  39. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/tests/unit/test_config.py +0 -0
  40. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/tests/unit/test_http.py +0 -0
  41. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/xparse_client/__init__.py +0 -0
  42. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/xparse_client/_base.py +0 -0
  43. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/xparse_client/_client.py +0 -0
  44. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/xparse_client/_config.py +0 -0
  45. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/xparse_client/api/__init__.py +0 -0
  46. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/xparse_client/api/extract.py +0 -0
  47. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/xparse_client/api/local.py +0 -0
  48. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/xparse_client/api/pipeline.py +0 -0
  49. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/xparse_client/api/workflows.py +0 -0
  50. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/xparse_client/connectors/__init__.py +0 -0
  51. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/xparse_client/connectors/_utils.py +0 -0
  52. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/xparse_client/connectors/destinations/__init__.py +0 -0
  53. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/xparse_client/connectors/destinations/base.py +0 -0
  54. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/xparse_client/connectors/destinations/local.py +0 -0
  55. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/xparse_client/connectors/destinations/milvus.py +0 -0
  56. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/xparse_client/connectors/destinations/qdrant.py +0 -0
  57. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/xparse_client/connectors/destinations/s3.py +0 -0
  58. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/xparse_client/connectors/sources/__init__.py +0 -0
  59. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/xparse_client/connectors/sources/base.py +0 -0
  60. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/xparse_client/connectors/sources/ftp.py +0 -0
  61. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/xparse_client/connectors/sources/local.py +0 -0
  62. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/xparse_client/connectors/sources/s3.py +0 -0
  63. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/xparse_client/connectors/sources/smb.py +0 -0
  64. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/xparse_client/models/__init__.py +0 -0
  65. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/xparse_client/models/chunk.py +0 -0
  66. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/xparse_client/models/embed.py +0 -0
  67. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/xparse_client/models/extract.py +0 -0
  68. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/xparse_client/models/local.py +0 -0
  69. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/xparse_client/models/parse.py +0 -0
  70. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/xparse_client/models/pipeline.py +0 -0
  71. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/xparse_client/models/workflows.py +0 -0
  72. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/xparse_client.egg-info/SOURCES.txt +0 -0
  73. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/xparse_client.egg-info/dependency_links.txt +0 -0
  74. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/xparse_client.egg-info/requires.txt +0 -0
  75. {xparse_client-0.3.0b8 → xparse_client-0.3.0b10}/xparse_client.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: xparse-client
3
- Version: 0.3.0b8
3
+ Version: 0.3.0b10
4
4
  Summary: 面向 Agent 和 RAG 的文档处理 Pipeline 客户端
5
5
  Author-email: INTSIG-TEXTIN <support@textin.com>
6
6
  License-Expression: MIT
@@ -223,6 +223,7 @@ with open("document.pdf", "rb") as f:
223
223
  extract_config=ExtractConfig(schema=schema)
224
224
  )
225
225
 
226
+ # Extract API 返回的结构化数据在 result.result 中
226
227
  print(result.result)
227
228
  ```
228
229
 
@@ -277,22 +278,11 @@ result = client.parse.wait_for_result(
277
278
  poll_interval=5.0
278
279
  )
279
280
 
280
- print(f"任务完成,解析出 {len(result.result.elements)} 个元素")
281
- ```
282
-
283
- ### 同步 vs 异步
284
-
285
- ```python
286
- # 同步调用(适合小文件)
287
- result = client.parse.partition(file=f, filename="doc.pdf")
288
-
289
- # 异步调用(相同接口,加上 _async 后缀)
290
- import asyncio
291
-
292
- async def main():
293
- result = await client.parse.partition_async(file=f, filename="doc.pdf")
294
-
295
- asyncio.run(main())
281
+ if result.is_completed:
282
+ print(f"任务完成,结果 URL: {result.result_url}")
283
+ print("💡 异步任务返回的是 result_url,需要另外下载来获取解析结果")
284
+ elif result.is_failed:
285
+ print(f"任务失败: {result.error_message}")
296
286
  ```
297
287
 
298
288
  ---
@@ -326,7 +316,17 @@ result = client.pipeline.execute(
326
316
 
327
317
  ### 📊 详细的处理统计
328
318
 
319
+ Pipeline API 返回详细的处理统计信息:
320
+
329
321
  ```python
322
+ # 使用 Pipeline API 时可以获取统计信息
323
+ result = client.pipeline.execute(
324
+ file=f,
325
+ filename="document.pdf",
326
+ stages=[ParseStage(...), ChunkStage(...), EmbedStage(...)]
327
+ )
328
+
329
+ # 访问统计信息
330
330
  print(f"原始元素: {result.stats.original_elements}")
331
331
  print(f"分块后: {result.stats.chunked_elements}")
332
332
  print(f"向量化: {result.stats.embedded_elements}")
@@ -750,11 +750,15 @@ job = client.parse.create_async_job(file=f, filename="large.pdf")
750
750
 
751
751
  # 方式 1:自动等待完成
752
752
  result = client.parse.wait_for_result(job_id=job.job_id, timeout=300)
753
+ if result.is_completed:
754
+ print(f"任务完成,结果 URL: {result.result_url}")
755
+ # 注意:异步任务返回的是 result_url,需要另外下载来获取解析结果
753
756
 
754
757
  # 方式 2:手动轮询
755
758
  while True:
756
759
  status = client.parse.get_result(job_id=job.job_id)
757
- if status.status == "completed":
760
+ if status.is_completed:
761
+ print(f"结果 URL: {status.result_url}")
758
762
  break
759
763
  time.sleep(5)
760
764
  ```
@@ -174,6 +174,7 @@ with open("document.pdf", "rb") as f:
174
174
  extract_config=ExtractConfig(schema=schema)
175
175
  )
176
176
 
177
+ # Extract API 返回的结构化数据在 result.result 中
177
178
  print(result.result)
178
179
  ```
179
180
 
@@ -228,22 +229,11 @@ result = client.parse.wait_for_result(
228
229
  poll_interval=5.0
229
230
  )
230
231
 
231
- print(f"任务完成,解析出 {len(result.result.elements)} 个元素")
232
- ```
233
-
234
- ### 同步 vs 异步
235
-
236
- ```python
237
- # 同步调用(适合小文件)
238
- result = client.parse.partition(file=f, filename="doc.pdf")
239
-
240
- # 异步调用(相同接口,加上 _async 后缀)
241
- import asyncio
242
-
243
- async def main():
244
- result = await client.parse.partition_async(file=f, filename="doc.pdf")
245
-
246
- asyncio.run(main())
232
+ if result.is_completed:
233
+ print(f"任务完成,结果 URL: {result.result_url}")
234
+ print("💡 异步任务返回的是 result_url,需要另外下载来获取解析结果")
235
+ elif result.is_failed:
236
+ print(f"任务失败: {result.error_message}")
247
237
  ```
248
238
 
249
239
  ---
@@ -277,7 +267,17 @@ result = client.pipeline.execute(
277
267
 
278
268
  ### 📊 详细的处理统计
279
269
 
270
+ Pipeline API 返回详细的处理统计信息:
271
+
280
272
  ```python
273
+ # 使用 Pipeline API 时可以获取统计信息
274
+ result = client.pipeline.execute(
275
+ file=f,
276
+ filename="document.pdf",
277
+ stages=[ParseStage(...), ChunkStage(...), EmbedStage(...)]
278
+ )
279
+
280
+ # 访问统计信息
281
281
  print(f"原始元素: {result.stats.original_elements}")
282
282
  print(f"分块后: {result.stats.chunked_elements}")
283
283
  print(f"向量化: {result.stats.embedded_elements}")
@@ -701,11 +701,15 @@ job = client.parse.create_async_job(file=f, filename="large.pdf")
701
701
 
702
702
  # 方式 1:自动等待完成
703
703
  result = client.parse.wait_for_result(job_id=job.job_id, timeout=300)
704
+ if result.is_completed:
705
+ print(f"任务完成,结果 URL: {result.result_url}")
706
+ # 注意:异步任务返回的是 result_url,需要另外下载来获取解析结果
704
707
 
705
708
  # 方式 2:手动轮询
706
709
  while True:
707
710
  status = client.parse.get_result(job_id=job.job_id)
708
- if status.status == "completed":
711
+ if status.is_completed:
712
+ print(f"结果 URL: {status.result_url}")
709
713
  break
710
714
  time.sleep(5)
711
715
  ```
@@ -95,9 +95,9 @@ def example_2_extract_structured_data():
95
95
  )
96
96
 
97
97
  print("\n✅ 提取成功!")
98
- # Extract API 返回的数据在 extract_result.extracted_schema
99
- if result.extract_result and "extracted_schema" in result.extract_result:
100
- print(f" 提取的数据: {result.extract_result['extracted_schema']}")
98
+ # Extract API 返回的数据在 result.result
99
+ if result.result:
100
+ print(f" 提取的数据: {result.result}")
101
101
  else:
102
102
  print(" 未提取到数据")
103
103
 
@@ -22,9 +22,9 @@ from xparse_client.models import ParseConfig
22
22
 
23
23
  def example_1_create_and_wait():
24
24
  """示例 2.1: 创建异步任务并等待完成"""
25
- print("\n" + "="*60)
25
+ print("\n" + "=" * 60)
26
26
  print("示例 2.1: 创建异步任务并等待完成")
27
- print("="*60)
27
+ print("=" * 60)
28
28
 
29
29
  client = XParseClient.from_env()
30
30
 
@@ -37,9 +37,7 @@ def example_1_create_and_wait():
37
37
  print("\n📤 创建异步任务...")
38
38
  with open(test_file, "rb") as f:
39
39
  job = client.parse.create_async_job(
40
- file=f,
41
- filename=test_file.name,
42
- config=ParseConfig(provider="textin")
40
+ file=f, filename=test_file.name, config=ParseConfig(provider="textin")
43
41
  )
44
42
 
45
43
  print("✅ 任务已创建:")
@@ -47,18 +45,15 @@ def example_1_create_and_wait():
47
45
 
48
46
  # 等待任务完成
49
47
  print("\n⏳ 等待任务完成...")
50
- result = client.parse.wait_for_result(
51
- job_id=job.job_id,
52
- timeout_seconds=60.0,
53
- poll_interval_seconds=2.0
54
- )
48
+ result = client.parse.wait_for_result(job_id=job.job_id, timeout=60.0, poll_interval=2.0)
55
49
 
56
50
  if result.is_completed:
57
51
  print("\n✅ 任务完成!")
58
52
  print(f" - 任务 ID: {result.job_id}")
59
53
  print(f" - 文件 ID: {result.file_id}")
60
54
  print(f" - 结果 URL: {result.result_url}")
61
- print("\n 💡 提示: 需要下载 result_url 来获取解析结果")
55
+ print("\n 💡 提示: 异步任务只返回 result_url,需要另外下载来获取解析结果")
56
+ print(" result_url 是一个可以下载的 JSON 文件 URL")
62
57
  elif result.is_failed:
63
58
  print(f"\n❌ 任务失败: {result.error_message}")
64
59
  else:
@@ -70,9 +65,9 @@ def example_1_create_and_wait():
70
65
 
71
66
  def example_2_manual_polling():
72
67
  """示例 2.2: 手动轮询任务状态"""
73
- print("\n" + "="*60)
68
+ print("\n" + "=" * 60)
74
69
  print("示例 2.2: 手动轮询任务状态")
75
- print("="*60)
70
+ print("=" * 60)
76
71
 
77
72
  client = XParseClient.from_env()
78
73
 
@@ -84,9 +79,7 @@ def example_2_manual_polling():
84
79
  # 创建异步任务
85
80
  with open(test_file, "rb") as f:
86
81
  job = client.parse.create_async_job(
87
- file=f,
88
- filename=test_file.name,
89
- config=ParseConfig(provider="textin")
82
+ file=f, filename=test_file.name, config=ParseConfig(provider="textin")
90
83
  )
91
84
 
92
85
  print(f"✅ 任务已创建: {job.job_id}")
@@ -128,9 +121,9 @@ def example_2_manual_polling():
128
121
 
129
122
  def example_3_error_handling():
130
123
  """示例 2.3: 异步任务错误处理"""
131
- print("\n" + "="*60)
124
+ print("\n" + "=" * 60)
132
125
  print("示例 2.3: 异步任务错误处理")
133
- print("="*60)
126
+ print("=" * 60)
134
127
 
135
128
  client = XParseClient.from_env()
136
129
 
@@ -142,9 +135,7 @@ def example_3_error_handling():
142
135
  print("\n📤 创建任务并设置很短的超时时间...")
143
136
  with open(test_file, "rb") as f:
144
137
  job = client.parse.create_async_job(
145
- file=f,
146
- filename=test_file.name,
147
- config=ParseConfig(provider="textin")
138
+ file=f, filename=test_file.name, config=ParseConfig(provider="textin")
148
139
  )
149
140
 
150
141
  print(f"✅ 任务已创建: {job.job_id}")
@@ -153,8 +144,8 @@ def example_3_error_handling():
153
144
  try:
154
145
  result = client.parse.wait_for_result(
155
146
  job_id=job.job_id,
156
- timeout_seconds=0.5, # 很短的超时
157
- poll_interval_seconds=0.2
147
+ timeout=0.5, # 很短的超时
148
+ poll_interval=0.2,
158
149
  )
159
150
 
160
151
  if result.is_completed:
@@ -173,9 +164,9 @@ def example_3_error_handling():
173
164
 
174
165
  def main():
175
166
  """主函数"""
176
- print("\n" + "="*60)
167
+ print("\n" + "=" * 60)
177
168
  print("xparse-client 服务端异步任务示例")
178
- print("="*60)
169
+ print("=" * 60)
179
170
 
180
171
  # 检查环境变量
181
172
  if not os.getenv("TEXTIN_APP_ID") or not os.getenv("TEXTIN_SECRET_CODE"):
@@ -196,13 +187,14 @@ def main():
196
187
  example_2_manual_polling()
197
188
  example_3_error_handling()
198
189
 
199
- print("\n" + "="*60)
190
+ print("\n" + "=" * 60)
200
191
  print("✅ 所有示例运行完成!")
201
- print("="*60)
192
+ print("=" * 60)
202
193
 
203
194
  except Exception as e:
204
195
  print(f"\n❌ 错误: {e}")
205
196
  import traceback
197
+
206
198
  traceback.print_exc()
207
199
 
208
200
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "xparse-client"
7
- version = "0.3.0b8"
7
+ version = "0.3.0b10"
8
8
  description = "面向 Agent 和 RAG 的文档处理 Pipeline 客户端"
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -59,10 +59,13 @@ class TestParseAPI:
59
59
  def test_create_async_job(self, client, sample_pdf_bytes):
60
60
  """测试创建异步任务"""
61
61
  respx.post(f"{client.config.server_url}/api/xparse/parse/async").mock(
62
- return_value=httpx.Response(200, json={
63
- "code": 200,
64
- "data": {"job_id": "job_123"},
65
- })
62
+ return_value=httpx.Response(
63
+ 200,
64
+ json={
65
+ "code": 200,
66
+ "data": {"job_id": "job_123"},
67
+ },
68
+ )
66
69
  )
67
70
 
68
71
  result = client.parse.create_async_job(
@@ -76,10 +79,13 @@ class TestParseAPI:
76
79
  def test_create_async_job_with_config(self, client, sample_pdf_bytes):
77
80
  """测试创建异步任务(带配置)"""
78
81
  respx.post(f"{client.config.server_url}/api/xparse/parse/async").mock(
79
- return_value=httpx.Response(200, json={
80
- "code": 200,
81
- "data": {"job_id": "job_456"},
82
- })
82
+ return_value=httpx.Response(
83
+ 200,
84
+ json={
85
+ "code": 200,
86
+ "data": {"job_id": "job_456"},
87
+ },
88
+ )
83
89
  )
84
90
 
85
91
  result = client.parse.create_async_job(
@@ -94,10 +100,13 @@ class TestParseAPI:
94
100
  def test_create_async_job_with_webhook(self, client, sample_pdf_bytes):
95
101
  """测试创建异步任务(带 webhook)"""
96
102
  respx.post(f"{client.config.server_url}/api/xparse/parse/async").mock(
97
- return_value=httpx.Response(200, json={
98
- "code": 200,
99
- "data": {"job_id": "job_789"},
100
- })
103
+ return_value=httpx.Response(
104
+ 200,
105
+ json={
106
+ "code": 200,
107
+ "data": {"job_id": "job_789"},
108
+ },
109
+ )
101
110
  )
102
111
 
103
112
  result = client.parse.create_async_job(
@@ -112,18 +121,19 @@ class TestParseAPI:
112
121
  def test_get_result_completed(self, client):
113
122
  """测试获取已完成的任务结果"""
114
123
  respx.get(f"{client.config.server_url}/api/xparse/parse/async/job_123").mock(
115
- return_value=httpx.Response(200, json={
116
- "code": 200,
117
- "data": {
118
- "job_id": "job_123",
119
- "file_id": "file_456",
120
- "status": "completed",
121
- "result_url": "https://example.com/result.json",
122
- "elements": [
123
- {"element_id": "elem_001", "type": "text", "text": "test"}
124
- ],
124
+ return_value=httpx.Response(
125
+ 200,
126
+ json={
127
+ "code": 200,
128
+ "data": {
129
+ "job_id": "job_123",
130
+ "file_id": "file_456",
131
+ "status": "completed",
132
+ "result_url": "https://example.com/result.json",
133
+ "elements": [{"element_id": "elem_001", "type": "text", "text": "test"}],
134
+ },
125
135
  },
126
- })
136
+ )
127
137
  )
128
138
 
129
139
  result = client.parse.get_result(job_id="job_123")
@@ -141,13 +151,16 @@ class TestParseAPI:
141
151
  def test_get_result_in_progress(self, client):
142
152
  """测试获取进行中的任务"""
143
153
  respx.get(f"{client.config.server_url}/api/xparse/parse/async/job_123").mock(
144
- return_value=httpx.Response(200, json={
145
- "code": 200,
146
- "data": {
147
- "job_id": "job_123",
148
- "status": "in_progress",
154
+ return_value=httpx.Response(
155
+ 200,
156
+ json={
157
+ "code": 200,
158
+ "data": {
159
+ "job_id": "job_123",
160
+ "status": "in_progress",
161
+ },
149
162
  },
150
- })
163
+ )
151
164
  )
152
165
 
153
166
  result = client.parse.get_result(job_id="job_123")
@@ -161,13 +174,16 @@ class TestParseAPI:
161
174
  def test_get_result_scheduled(self, client):
162
175
  """测试获取已调度但未开始的任务"""
163
176
  respx.get(f"{client.config.server_url}/api/xparse/parse/async/job_123").mock(
164
- return_value=httpx.Response(200, json={
165
- "code": 200,
166
- "data": {
167
- "job_id": "job_123",
168
- "status": "scheduled",
177
+ return_value=httpx.Response(
178
+ 200,
179
+ json={
180
+ "code": 200,
181
+ "data": {
182
+ "job_id": "job_123",
183
+ "status": "scheduled",
184
+ },
169
185
  },
170
- })
186
+ )
171
187
  )
172
188
 
173
189
  result = client.parse.get_result(job_id="job_123")
@@ -180,14 +196,17 @@ class TestParseAPI:
180
196
  def test_get_result_failed(self, client):
181
197
  """测试获取失败的任务"""
182
198
  respx.get(f"{client.config.server_url}/api/xparse/parse/async/job_123").mock(
183
- return_value=httpx.Response(200, json={
184
- "code": 200,
185
- "data": {
186
- "job_id": "job_123",
187
- "status": "failed",
188
- "error_message": "处理失败",
199
+ return_value=httpx.Response(
200
+ 200,
201
+ json={
202
+ "code": 200,
203
+ "data": {
204
+ "job_id": "job_123",
205
+ "status": "failed",
206
+ "error_message": "处理失败",
207
+ },
189
208
  },
190
- })
209
+ )
191
210
  )
192
211
 
193
212
  result = client.parse.get_result(job_id="job_123")
@@ -198,35 +217,41 @@ class TestParseAPI:
198
217
  assert result.error_message == "处理失败"
199
218
 
200
219
  @respx.mock
201
- @patch('time.sleep')
220
+ @patch("time.sleep")
202
221
  def test_wait_for_result_success(self, mock_sleep, client):
203
222
  """测试等待任务完成(成功)"""
204
223
  # 第一次查询: 进行中
205
224
  respx.get(f"{client.config.server_url}/api/xparse/parse/async/job_123").mock(
206
225
  side_effect=[
207
- httpx.Response(200, json={
208
- "code": 200,
209
- "data": {
210
- "job_id": "job_123",
211
- "status": "in_progress",
226
+ httpx.Response(
227
+ 200,
228
+ json={
229
+ "code": 200,
230
+ "data": {
231
+ "job_id": "job_123",
232
+ "status": "in_progress",
233
+ },
212
234
  },
213
- }),
214
- httpx.Response(200, json={
215
- "code": 200,
216
- "data": {
217
- "job_id": "job_123",
218
- "status": "completed",
219
- "elements": [
220
- {"element_id": "elem_001", "type": "text", "text": "test"}
221
- ],
235
+ ),
236
+ httpx.Response(
237
+ 200,
238
+ json={
239
+ "code": 200,
240
+ "data": {
241
+ "job_id": "job_123",
242
+ "status": "completed",
243
+ "elements": [
244
+ {"element_id": "elem_001", "type": "text", "text": "test"}
245
+ ],
246
+ },
222
247
  },
223
- }),
248
+ ),
224
249
  ]
225
250
  )
226
251
 
227
252
  result = client.parse.wait_for_result(
228
253
  job_id="job_123",
229
- poll_interval_seconds=1,
254
+ poll_interval=1,
230
255
  )
231
256
 
232
257
  assert result.is_completed
@@ -236,18 +261,21 @@ class TestParseAPI:
236
261
  mock_sleep.assert_called_with(1)
237
262
 
238
263
  @respx.mock
239
- @patch('time.sleep')
264
+ @patch("time.sleep")
240
265
  def test_wait_for_result_immediate_completion(self, mock_sleep, client):
241
266
  """测试等待任务完成(立即完成)"""
242
267
  respx.get(f"{client.config.server_url}/api/xparse/parse/async/job_123").mock(
243
- return_value=httpx.Response(200, json={
244
- "code": 200,
245
- "data": {
246
- "job_id": "job_123",
247
- "status": "completed",
248
- "elements": [],
268
+ return_value=httpx.Response(
269
+ 200,
270
+ json={
271
+ "code": 200,
272
+ "data": {
273
+ "job_id": "job_123",
274
+ "status": "completed",
275
+ "elements": [],
276
+ },
249
277
  },
250
- })
278
+ )
251
279
  )
252
280
 
253
281
  result = client.parse.wait_for_result(job_id="job_123")
@@ -257,18 +285,21 @@ class TestParseAPI:
257
285
  assert mock_sleep.call_count == 0
258
286
 
259
287
  @respx.mock
260
- @patch('time.sleep')
288
+ @patch("time.sleep")
261
289
  def test_wait_for_result_task_failed(self, mock_sleep, client):
262
290
  """测试等待任务完成(任务失败)"""
263
291
  respx.get(f"{client.config.server_url}/api/xparse/parse/async/job_123").mock(
264
- return_value=httpx.Response(200, json={
265
- "code": 200,
266
- "data": {
267
- "job_id": "job_123",
268
- "status": "failed",
269
- "error_message": "文件格式不支持",
292
+ return_value=httpx.Response(
293
+ 200,
294
+ json={
295
+ "code": 200,
296
+ "data": {
297
+ "job_id": "job_123",
298
+ "status": "failed",
299
+ "error_message": "文件格式不支持",
300
+ },
270
301
  },
271
- })
302
+ )
272
303
  )
273
304
 
274
305
  with pytest.raises(APIError) as exc_info:
@@ -278,62 +309,74 @@ class TestParseAPI:
278
309
  assert "文件格式不支持" in str(exc_info.value)
279
310
 
280
311
  @respx.mock
281
- @patch('time.time')
282
- @patch('time.sleep')
312
+ @patch("time.time")
313
+ @patch("time.sleep")
283
314
  def test_wait_for_result_timeout(self, mock_sleep, mock_time, client):
284
315
  """测试等待任务超时"""
285
316
  # Mock time.time() 返回递增的时间
286
317
  mock_time.side_effect = [0, 5, 11] # 第一次0秒,第二次5秒,第三次11秒
287
318
 
288
319
  respx.get(f"{client.config.server_url}/api/xparse/parse/async/job_123").mock(
289
- return_value=httpx.Response(200, json={
290
- "code": 200,
291
- "data": {
292
- "job_id": "job_123",
293
- "status": "in_progress",
320
+ return_value=httpx.Response(
321
+ 200,
322
+ json={
323
+ "code": 200,
324
+ "data": {
325
+ "job_id": "job_123",
326
+ "status": "in_progress",
327
+ },
294
328
  },
295
- })
329
+ )
296
330
  )
297
331
 
298
332
  with pytest.raises(RequestTimeoutError) as exc_info:
299
333
  client.parse.wait_for_result(
300
334
  job_id="job_123",
301
- timeout_seconds=10,
302
- poll_interval_seconds=5,
335
+ timeout=10,
336
+ poll_interval=5,
303
337
  )
304
338
 
305
339
  assert "等待解析任务超时" in str(exc_info.value)
306
340
  assert "job_123" in str(exc_info.value)
307
341
 
308
342
  @respx.mock
309
- @patch('time.sleep')
343
+ @patch("time.sleep")
310
344
  def test_wait_for_result_multiple_polls(self, mock_sleep, client):
311
345
  """测试多次轮询后完成"""
312
346
  # 模拟 3 次轮询: scheduled -> in_progress -> completed
313
347
  respx.get(f"{client.config.server_url}/api/xparse/parse/async/job_123").mock(
314
348
  side_effect=[
315
- httpx.Response(200, json={
316
- "code": 200,
317
- "data": {"job_id": "job_123", "status": "scheduled"},
318
- }),
319
- httpx.Response(200, json={
320
- "code": 200,
321
- "data": {"job_id": "job_123", "status": "in_progress"},
322
- }),
323
- httpx.Response(200, json={
324
- "code": 200,
325
- "data": {
326
- "job_id": "job_123",
327
- "status": "completed",
328
- "elements": [],
349
+ httpx.Response(
350
+ 200,
351
+ json={
352
+ "code": 200,
353
+ "data": {"job_id": "job_123", "status": "scheduled"},
354
+ },
355
+ ),
356
+ httpx.Response(
357
+ 200,
358
+ json={
359
+ "code": 200,
360
+ "data": {"job_id": "job_123", "status": "in_progress"},
329
361
  },
330
- }),
362
+ ),
363
+ httpx.Response(
364
+ 200,
365
+ json={
366
+ "code": 200,
367
+ "data": {
368
+ "job_id": "job_123",
369
+ "status": "completed",
370
+ "elements": [],
371
+ },
372
+ },
373
+ ),
331
374
  ]
332
375
  )
333
376
 
334
377
  result = client.parse.wait_for_result(
335
378
  job_id="job_123",
336
- poll_interval_seconds=2,
379
+ poll_interval=2,
337
380
  )
338
381
 
339
382
  assert result.is_completed
@@ -344,31 +387,33 @@ class TestParseAPI:
344
387
  def test_wait_for_result_custom_intervals(self, client):
345
388
  """测试自定义轮询参数"""
346
389
  respx.get(f"{client.config.server_url}/api/xparse/parse/async/job_123").mock(
347
- return_value=httpx.Response(200, json={
348
- "code": 200,
349
- "data": {
350
- "job_id": "job_123",
351
- "status": "completed",
352
- "elements": [],
390
+ return_value=httpx.Response(
391
+ 200,
392
+ json={
393
+ "code": 200,
394
+ "data": {
395
+ "job_id": "job_123",
396
+ "status": "completed",
397
+ "elements": [],
398
+ },
353
399
  },
354
- })
400
+ )
355
401
  )
356
402
 
357
403
  result = client.parse.wait_for_result(
358
404
  job_id="job_123",
359
- timeout_seconds=300,
360
- poll_interval_seconds=10,
405
+ timeout=300,
406
+ poll_interval=10,
361
407
  )
362
408
 
363
409
  assert result.is_completed
364
410
 
365
411
  def test_partition_async_removed(self, client):
366
412
  """测试 partition_async 方法已被移除"""
367
- assert not hasattr(client.parse, 'partition_async')
413
+ assert not hasattr(client.parse, "partition_async")
368
414
 
369
415
  def test_async_job_methods_still_exist(self, client):
370
416
  """测试服务端异步任务方法仍然存在"""
371
- assert hasattr(client.parse, 'create_async_job')
372
- assert hasattr(client.parse, 'get_result')
373
- assert hasattr(client.parse, 'wait_for_result')
374
-
417
+ assert hasattr(client.parse, "create_async_job")
418
+ assert hasattr(client.parse, "get_result")
419
+ assert hasattr(client.parse, "wait_for_result")
@@ -1,6 +1,5 @@
1
1
  """异常类测试"""
2
2
 
3
-
4
3
  from xparse_client.exceptions import (
5
4
  APIError,
6
5
  AuthenticationError,
@@ -137,9 +136,9 @@ class TestAPIError:
137
136
  """测试超时错误"""
138
137
  error = RequestTimeoutError(
139
138
  "请求超时",
140
- timeout_seconds=30.0,
139
+ timeout=30.0,
141
140
  )
142
- assert error.timeout_seconds == 30.0
141
+ assert error.timeout == 30.0
143
142
 
144
143
 
145
144
  class TestConnectorError:
@@ -291,7 +291,7 @@ class HTTPClient:
291
291
  continue
292
292
  raise RequestTimeoutError(
293
293
  f"请求超时: {path}",
294
- timeout_seconds=timeout or self.config.timeout,
294
+ timeout=timeout or self.config.timeout,
295
295
  ) from e
296
296
 
297
297
  except httpx.RequestError as e:
@@ -86,7 +86,6 @@ class Parse(BaseAPI):
86
86
  response = self._post("/parse/sync", files=files, data=data)
87
87
  return self._parse_response(response, ParseResponse)
88
88
 
89
-
90
89
  def create_async_job(
91
90
  self,
92
91
  *,
@@ -127,7 +126,6 @@ class Parse(BaseAPI):
127
126
  response = self._post("/parse/async", files=files, data=data)
128
127
  return self._parse_response(response, AsyncJobResponse)
129
128
 
130
-
131
129
  def get_result(self, *, job_id: str) -> JobStatusResponse:
132
130
  """获取异步解析结果
133
131
 
@@ -149,13 +147,12 @@ class Parse(BaseAPI):
149
147
  response = self._get(f"/parse/async/{job_id}")
150
148
  return self._parse_response(response, JobStatusResponse)
151
149
 
152
-
153
150
  def wait_for_result(
154
151
  self,
155
152
  *,
156
153
  job_id: str,
157
- timeout_seconds: float = 3600,
158
- poll_interval_seconds: float = 5,
154
+ timeout: float = 3600,
155
+ poll_interval: float = 5,
159
156
  ) -> JobStatusResponse:
160
157
  """等待异步任务完成
161
158
 
@@ -163,8 +160,8 @@ class Parse(BaseAPI):
163
160
 
164
161
  Args:
165
162
  job_id: 任务 ID
166
- timeout_seconds: 超时时间(秒),默认 3600
167
- poll_interval_seconds: 轮询间隔(秒),默认 5
163
+ timeout: 超时时间(秒),默认 3600
164
+ poll_interval: 轮询间隔(秒),默认 5
168
165
 
169
166
  Returns:
170
167
  JobStatusResponse: 完成的任务结果
@@ -177,8 +174,8 @@ class Parse(BaseAPI):
177
174
  >>> job = client.parse.create_async_job(file=file_bytes, filename="doc.pdf")
178
175
  >>> result = client.parse.wait_for_result(
179
176
  ... job_id=job.job_id,
180
- ... timeout_seconds=600,
181
- ... poll_interval_seconds=10
177
+ ... timeout=600,
178
+ ... poll_interval=10
182
179
  ... )
183
180
  """
184
181
  start_time = time.time()
@@ -196,14 +193,13 @@ class Parse(BaseAPI):
196
193
  )
197
194
 
198
195
  elapsed = time.time() - start_time
199
- if elapsed > timeout_seconds:
196
+ if elapsed > timeout:
200
197
  raise RequestTimeoutError(
201
198
  f"等待解析任务超时: {job_id}",
202
- timeout_seconds=timeout_seconds,
199
+ timeout=timeout,
203
200
  )
204
201
 
205
- time.sleep(poll_interval_seconds)
206
-
202
+ time.sleep(poll_interval)
207
203
 
208
204
 
209
205
  __all__ = ["Parse"]
@@ -248,21 +248,21 @@ class RequestTimeoutError(APIError):
248
248
  请求超时时抛出。
249
249
 
250
250
  Attributes:
251
- timeout_seconds: 超时时间设置
251
+ timeout: 超时时间设置
252
252
  """
253
253
 
254
254
  def __init__(
255
255
  self,
256
256
  message: str,
257
257
  *,
258
- timeout_seconds: float | None = None,
258
+ timeout: float | None = None,
259
259
  **kwargs: Any,
260
260
  ) -> None:
261
- self.timeout_seconds = timeout_seconds
261
+ self.timeout = timeout
262
262
 
263
263
  details = kwargs.pop("details", {}) or {}
264
- if timeout_seconds:
265
- details["timeout_seconds"] = timeout_seconds
264
+ if timeout:
265
+ details["timeout_seconds"] = timeout
266
266
 
267
267
  super().__init__(message, details=details, **kwargs)
268
268
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: xparse-client
3
- Version: 0.3.0b8
3
+ Version: 0.3.0b10
4
4
  Summary: 面向 Agent 和 RAG 的文档处理 Pipeline 客户端
5
5
  Author-email: INTSIG-TEXTIN <support@textin.com>
6
6
  License-Expression: MIT
@@ -223,6 +223,7 @@ with open("document.pdf", "rb") as f:
223
223
  extract_config=ExtractConfig(schema=schema)
224
224
  )
225
225
 
226
+ # Extract API 返回的结构化数据在 result.result 中
226
227
  print(result.result)
227
228
  ```
228
229
 
@@ -277,22 +278,11 @@ result = client.parse.wait_for_result(
277
278
  poll_interval=5.0
278
279
  )
279
280
 
280
- print(f"任务完成,解析出 {len(result.result.elements)} 个元素")
281
- ```
282
-
283
- ### 同步 vs 异步
284
-
285
- ```python
286
- # 同步调用(适合小文件)
287
- result = client.parse.partition(file=f, filename="doc.pdf")
288
-
289
- # 异步调用(相同接口,加上 _async 后缀)
290
- import asyncio
291
-
292
- async def main():
293
- result = await client.parse.partition_async(file=f, filename="doc.pdf")
294
-
295
- asyncio.run(main())
281
+ if result.is_completed:
282
+ print(f"任务完成,结果 URL: {result.result_url}")
283
+ print("💡 异步任务返回的是 result_url,需要另外下载来获取解析结果")
284
+ elif result.is_failed:
285
+ print(f"任务失败: {result.error_message}")
296
286
  ```
297
287
 
298
288
  ---
@@ -326,7 +316,17 @@ result = client.pipeline.execute(
326
316
 
327
317
  ### 📊 详细的处理统计
328
318
 
319
+ Pipeline API 返回详细的处理统计信息:
320
+
329
321
  ```python
322
+ # 使用 Pipeline API 时可以获取统计信息
323
+ result = client.pipeline.execute(
324
+ file=f,
325
+ filename="document.pdf",
326
+ stages=[ParseStage(...), ChunkStage(...), EmbedStage(...)]
327
+ )
328
+
329
+ # 访问统计信息
330
330
  print(f"原始元素: {result.stats.original_elements}")
331
331
  print(f"分块后: {result.stats.chunked_elements}")
332
332
  print(f"向量化: {result.stats.embedded_elements}")
@@ -750,11 +750,15 @@ job = client.parse.create_async_job(file=f, filename="large.pdf")
750
750
 
751
751
  # 方式 1:自动等待完成
752
752
  result = client.parse.wait_for_result(job_id=job.job_id, timeout=300)
753
+ if result.is_completed:
754
+ print(f"任务完成,结果 URL: {result.result_url}")
755
+ # 注意:异步任务返回的是 result_url,需要另外下载来获取解析结果
753
756
 
754
757
  # 方式 2:手动轮询
755
758
  while True:
756
759
  status = client.parse.get_result(job_id=job.job_id)
757
- if status.status == "completed":
760
+ if status.is_completed:
761
+ print(f"结果 URL: {status.result_url}")
758
762
  break
759
763
  time.sleep(5)
760
764
  ```