xparse-client 0.3.0b9__tar.gz → 0.3.0b11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. {xparse_client-0.3.0b9/xparse_client.egg-info → xparse_client-0.3.0b11}/PKG-INFO +22 -18
  2. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/README.md +21 -17
  3. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/example/1_basic_api_usage.py +3 -3
  4. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/example/2_async_job.py +2 -1
  5. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/pyproject.toml +1 -1
  6. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/xparse_client/api/extract.py +1 -1
  7. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11/xparse_client.egg-info}/PKG-INFO +22 -18
  8. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/CHANGELOG.md +0 -0
  9. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/LICENSE +0 -0
  10. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/MANIFEST.in +0 -0
  11. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/example/3_local_workflow.py +0 -0
  12. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/example/4_advanced_workflow.py +0 -0
  13. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/example/README.md +0 -0
  14. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/example/config_example.json +0 -0
  15. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/setup.cfg +0 -0
  16. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/tests/conftest.py +0 -0
  17. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/tests/unit/__init__.py +0 -0
  18. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/tests/unit/api/__init__.py +0 -0
  19. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/tests/unit/api/test_extract.py +0 -0
  20. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/tests/unit/api/test_local.py +0 -0
  21. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/tests/unit/api/test_parse.py +0 -0
  22. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/tests/unit/api/test_pipeline.py +0 -0
  23. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/tests/unit/api/test_workflows.py +0 -0
  24. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/tests/unit/connectors/test_ftp.py +0 -0
  25. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/tests/unit/connectors/test_local_connectors.py +0 -0
  26. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/tests/unit/connectors/test_milvus.py +0 -0
  27. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/tests/unit/connectors/test_qdrant.py +0 -0
  28. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/tests/unit/connectors/test_s3.py +0 -0
  29. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/tests/unit/connectors/test_smb.py +0 -0
  30. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/tests/unit/connectors/test_utils.py +0 -0
  31. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/tests/unit/models/test_local.py +0 -0
  32. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/tests/unit/models/test_pipeline_stages.py +0 -0
  33. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/tests/unit/models/test_workflows.py +0 -0
  34. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/tests/unit/test_base.py +0 -0
  35. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/tests/unit/test_client.py +0 -0
  36. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/tests/unit/test_config.py +0 -0
  37. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/tests/unit/test_exceptions.py +0 -0
  38. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/tests/unit/test_http.py +0 -0
  39. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/xparse_client/__init__.py +0 -0
  40. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/xparse_client/_base.py +0 -0
  41. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/xparse_client/_client.py +0 -0
  42. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/xparse_client/_config.py +0 -0
  43. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/xparse_client/_http.py +0 -0
  44. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/xparse_client/api/__init__.py +0 -0
  45. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/xparse_client/api/local.py +0 -0
  46. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/xparse_client/api/parse.py +0 -0
  47. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/xparse_client/api/pipeline.py +0 -0
  48. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/xparse_client/api/workflows.py +0 -0
  49. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/xparse_client/connectors/__init__.py +0 -0
  50. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/xparse_client/connectors/_utils.py +0 -0
  51. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/xparse_client/connectors/destinations/__init__.py +0 -0
  52. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/xparse_client/connectors/destinations/base.py +0 -0
  53. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/xparse_client/connectors/destinations/local.py +0 -0
  54. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/xparse_client/connectors/destinations/milvus.py +0 -0
  55. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/xparse_client/connectors/destinations/qdrant.py +0 -0
  56. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/xparse_client/connectors/destinations/s3.py +0 -0
  57. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/xparse_client/connectors/sources/__init__.py +0 -0
  58. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/xparse_client/connectors/sources/base.py +0 -0
  59. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/xparse_client/connectors/sources/ftp.py +0 -0
  60. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/xparse_client/connectors/sources/local.py +0 -0
  61. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/xparse_client/connectors/sources/s3.py +0 -0
  62. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/xparse_client/connectors/sources/smb.py +0 -0
  63. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/xparse_client/exceptions.py +0 -0
  64. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/xparse_client/models/__init__.py +0 -0
  65. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/xparse_client/models/chunk.py +0 -0
  66. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/xparse_client/models/embed.py +0 -0
  67. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/xparse_client/models/extract.py +0 -0
  68. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/xparse_client/models/local.py +0 -0
  69. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/xparse_client/models/parse.py +0 -0
  70. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/xparse_client/models/pipeline.py +0 -0
  71. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/xparse_client/models/workflows.py +0 -0
  72. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/xparse_client.egg-info/SOURCES.txt +0 -0
  73. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/xparse_client.egg-info/dependency_links.txt +0 -0
  74. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/xparse_client.egg-info/requires.txt +0 -0
  75. {xparse_client-0.3.0b9 → xparse_client-0.3.0b11}/xparse_client.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: xparse-client
3
- Version: 0.3.0b9
3
+ Version: 0.3.0b11
4
4
  Summary: 面向 Agent 和 RAG 的文档处理 Pipeline 客户端
5
5
  Author-email: INTSIG-TEXTIN <support@textin.com>
6
6
  License-Expression: MIT
@@ -223,6 +223,7 @@ with open("document.pdf", "rb") as f:
223
223
  extract_config=ExtractConfig(schema=schema)
224
224
  )
225
225
 
226
+ # Extract API 返回的结构化数据在 result.result 中
226
227
  print(result.result)
227
228
  ```
228
229
 
@@ -277,22 +278,11 @@ result = client.parse.wait_for_result(
277
278
  poll_interval=5.0
278
279
  )
279
280
 
280
- print(f"任务完成,解析出 {len(result.result.elements)} 个元素")
281
- ```
282
-
283
- ### 同步 vs 异步
284
-
285
- ```python
286
- # 同步调用(适合小文件)
287
- result = client.parse.partition(file=f, filename="doc.pdf")
288
-
289
- # 异步调用(相同接口,加上 _async 后缀)
290
- import asyncio
291
-
292
- async def main():
293
- result = await client.parse.partition_async(file=f, filename="doc.pdf")
294
-
295
- asyncio.run(main())
281
+ if result.is_completed:
282
+ print(f"任务完成,结果 URL: {result.result_url}")
283
+ print("💡 异步任务返回的是 result_url,需要另外下载来获取解析结果")
284
+ elif result.is_failed:
285
+ print(f"任务失败: {result.error_message}")
296
286
  ```
297
287
 
298
288
  ---
@@ -326,7 +316,17 @@ result = client.pipeline.execute(
326
316
 
327
317
  ### 📊 详细的处理统计
328
318
 
319
+ Pipeline API 返回详细的处理统计信息:
320
+
329
321
  ```python
322
+ # 使用 Pipeline API 时可以获取统计信息
323
+ result = client.pipeline.execute(
324
+ file=f,
325
+ filename="document.pdf",
326
+ stages=[ParseStage(...), ChunkStage(...), EmbedStage(...)]
327
+ )
328
+
329
+ # 访问统计信息
330
330
  print(f"原始元素: {result.stats.original_elements}")
331
331
  print(f"分块后: {result.stats.chunked_elements}")
332
332
  print(f"向量化: {result.stats.embedded_elements}")
@@ -750,11 +750,15 @@ job = client.parse.create_async_job(file=f, filename="large.pdf")
750
750
 
751
751
  # 方式 1:自动等待完成
752
752
  result = client.parse.wait_for_result(job_id=job.job_id, timeout=300)
753
+ if result.is_completed:
754
+ print(f"任务完成,结果 URL: {result.result_url}")
755
+ # 注意:异步任务返回的是 result_url,需要另外下载来获取解析结果
753
756
 
754
757
  # 方式 2:手动轮询
755
758
  while True:
756
759
  status = client.parse.get_result(job_id=job.job_id)
757
- if status.status == "completed":
760
+ if status.is_completed:
761
+ print(f"结果 URL: {status.result_url}")
758
762
  break
759
763
  time.sleep(5)
760
764
  ```
@@ -174,6 +174,7 @@ with open("document.pdf", "rb") as f:
174
174
  extract_config=ExtractConfig(schema=schema)
175
175
  )
176
176
 
177
+ # Extract API 返回的结构化数据在 result.result 中
177
178
  print(result.result)
178
179
  ```
179
180
 
@@ -228,22 +229,11 @@ result = client.parse.wait_for_result(
228
229
  poll_interval=5.0
229
230
  )
230
231
 
231
- print(f"任务完成,解析出 {len(result.result.elements)} 个元素")
232
- ```
233
-
234
- ### 同步 vs 异步
235
-
236
- ```python
237
- # 同步调用(适合小文件)
238
- result = client.parse.partition(file=f, filename="doc.pdf")
239
-
240
- # 异步调用(相同接口,加上 _async 后缀)
241
- import asyncio
242
-
243
- async def main():
244
- result = await client.parse.partition_async(file=f, filename="doc.pdf")
245
-
246
- asyncio.run(main())
232
+ if result.is_completed:
233
+ print(f"任务完成,结果 URL: {result.result_url}")
234
+ print("💡 异步任务返回的是 result_url,需要另外下载来获取解析结果")
235
+ elif result.is_failed:
236
+ print(f"任务失败: {result.error_message}")
247
237
  ```
248
238
 
249
239
  ---
@@ -277,7 +267,17 @@ result = client.pipeline.execute(
277
267
 
278
268
  ### 📊 详细的处理统计
279
269
 
270
+ Pipeline API 返回详细的处理统计信息:
271
+
280
272
  ```python
273
+ # 使用 Pipeline API 时可以获取统计信息
274
+ result = client.pipeline.execute(
275
+ file=f,
276
+ filename="document.pdf",
277
+ stages=[ParseStage(...), ChunkStage(...), EmbedStage(...)]
278
+ )
279
+
280
+ # 访问统计信息
281
281
  print(f"原始元素: {result.stats.original_elements}")
282
282
  print(f"分块后: {result.stats.chunked_elements}")
283
283
  print(f"向量化: {result.stats.embedded_elements}")
@@ -701,11 +701,15 @@ job = client.parse.create_async_job(file=f, filename="large.pdf")
701
701
 
702
702
  # 方式 1:自动等待完成
703
703
  result = client.parse.wait_for_result(job_id=job.job_id, timeout=300)
704
+ if result.is_completed:
705
+ print(f"任务完成,结果 URL: {result.result_url}")
706
+ # 注意:异步任务返回的是 result_url,需要另外下载来获取解析结果
704
707
 
705
708
  # 方式 2:手动轮询
706
709
  while True:
707
710
  status = client.parse.get_result(job_id=job.job_id)
708
- if status.status == "completed":
711
+ if status.is_completed:
712
+ print(f"结果 URL: {status.result_url}")
709
713
  break
710
714
  time.sleep(5)
711
715
  ```
@@ -95,9 +95,9 @@ def example_2_extract_structured_data():
95
95
  )
96
96
 
97
97
  print("\n✅ 提取成功!")
98
- # Extract API 返回的数据在 extract_result.extracted_schema
99
- if result.extract_result and "extracted_schema" in result.extract_result:
100
- print(f" 提取的数据: {result.extract_result['extracted_schema']}")
98
+ # Extract API 返回的数据在 result.result
99
+ if result.result:
100
+ print(f" 提取的数据: {result.result}")
101
101
  else:
102
102
  print(" 未提取到数据")
103
103
 
@@ -52,7 +52,8 @@ def example_1_create_and_wait():
52
52
  print(f" - 任务 ID: {result.job_id}")
53
53
  print(f" - 文件 ID: {result.file_id}")
54
54
  print(f" - 结果 URL: {result.result_url}")
55
- print("\n 💡 提示: 需要下载 result_url 来获取解析结果")
55
+ print("\n 💡 提示: 异步任务只返回 result_url,需要另外下载来获取解析结果")
56
+ print(" result_url 是一个可以下载的 JSON 文件 URL")
56
57
  elif result.is_failed:
57
58
  print(f"\n❌ 任务失败: {result.error_message}")
58
59
  else:
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "xparse-client"
7
- version = "0.3.0b9"
7
+ version = "0.3.0b11"
8
8
  description = "面向 Agent 和 RAG 的文档处理 Pipeline 客户端"
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -96,7 +96,7 @@ class Extract(BaseAPI):
96
96
  # 处理 extract_config
97
97
  if extract_config:
98
98
  data["extract_config"] = json.dumps(
99
- extract_config.model_dump(), ensure_ascii=False
99
+ extract_config.model_dump(by_alias=True), ensure_ascii=False
100
100
  )
101
101
  else:
102
102
  raise ValueError("extract_config is required")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: xparse-client
3
- Version: 0.3.0b9
3
+ Version: 0.3.0b11
4
4
  Summary: 面向 Agent 和 RAG 的文档处理 Pipeline 客户端
5
5
  Author-email: INTSIG-TEXTIN <support@textin.com>
6
6
  License-Expression: MIT
@@ -223,6 +223,7 @@ with open("document.pdf", "rb") as f:
223
223
  extract_config=ExtractConfig(schema=schema)
224
224
  )
225
225
 
226
+ # Extract API 返回的结构化数据在 result.result 中
226
227
  print(result.result)
227
228
  ```
228
229
 
@@ -277,22 +278,11 @@ result = client.parse.wait_for_result(
277
278
  poll_interval=5.0
278
279
  )
279
280
 
280
- print(f"任务完成,解析出 {len(result.result.elements)} 个元素")
281
- ```
282
-
283
- ### 同步 vs 异步
284
-
285
- ```python
286
- # 同步调用(适合小文件)
287
- result = client.parse.partition(file=f, filename="doc.pdf")
288
-
289
- # 异步调用(相同接口,加上 _async 后缀)
290
- import asyncio
291
-
292
- async def main():
293
- result = await client.parse.partition_async(file=f, filename="doc.pdf")
294
-
295
- asyncio.run(main())
281
+ if result.is_completed:
282
+ print(f"任务完成,结果 URL: {result.result_url}")
283
+ print("💡 异步任务返回的是 result_url,需要另外下载来获取解析结果")
284
+ elif result.is_failed:
285
+ print(f"任务失败: {result.error_message}")
296
286
  ```
297
287
 
298
288
  ---
@@ -326,7 +316,17 @@ result = client.pipeline.execute(
326
316
 
327
317
  ### 📊 详细的处理统计
328
318
 
319
+ Pipeline API 返回详细的处理统计信息:
320
+
329
321
  ```python
322
+ # 使用 Pipeline API 时可以获取统计信息
323
+ result = client.pipeline.execute(
324
+ file=f,
325
+ filename="document.pdf",
326
+ stages=[ParseStage(...), ChunkStage(...), EmbedStage(...)]
327
+ )
328
+
329
+ # 访问统计信息
330
330
  print(f"原始元素: {result.stats.original_elements}")
331
331
  print(f"分块后: {result.stats.chunked_elements}")
332
332
  print(f"向量化: {result.stats.embedded_elements}")
@@ -750,11 +750,15 @@ job = client.parse.create_async_job(file=f, filename="large.pdf")
750
750
 
751
751
  # 方式 1:自动等待完成
752
752
  result = client.parse.wait_for_result(job_id=job.job_id, timeout=300)
753
+ if result.is_completed:
754
+ print(f"任务完成,结果 URL: {result.result_url}")
755
+ # 注意:异步任务返回的是 result_url,需要另外下载来获取解析结果
753
756
 
754
757
  # 方式 2:手动轮询
755
758
  while True:
756
759
  status = client.parse.get_result(job_id=job.job_id)
757
- if status.status == "completed":
760
+ if status.is_completed:
761
+ print(f"结果 URL: {status.result_url}")
758
762
  break
759
763
  time.sleep(5)
760
764
  ```