xparse-client 0.3.0b1__tar.gz → 0.3.0b6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. {xparse_client-0.3.0b1/xparse_client.egg-info → xparse_client-0.3.0b6}/PKG-INFO +4 -4
  2. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/README.md +3 -3
  3. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/example/2_async_job.py +1 -1
  4. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/example/4_advanced_workflow.py +3 -3
  5. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/pyproject.toml +1 -1
  6. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/tests/unit/models/test_pipeline_stages.py +2 -2
  7. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/xparse_client/_base.py +15 -6
  8. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/xparse_client/_http.py +10 -9
  9. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/xparse_client/api/extract.py +4 -8
  10. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/xparse_client/api/local.py +52 -12
  11. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/xparse_client/api/pipeline.py +3 -1
  12. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/xparse_client/connectors/destinations/qdrant.py +1 -1
  13. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/xparse_client/connectors/sources/ftp.py +2 -2
  14. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/xparse_client/connectors/sources/smb.py +1 -1
  15. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/xparse_client/models/extract.py +5 -3
  16. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/xparse_client/models/parse.py +2 -6
  17. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/xparse_client/models/pipeline.py +3 -1
  18. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6/xparse_client.egg-info}/PKG-INFO +4 -4
  19. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/CHANGELOG.md +0 -0
  20. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/LICENSE +0 -0
  21. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/MANIFEST.in +0 -0
  22. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/example/1_basic_api_usage.py +0 -0
  23. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/example/3_local_workflow.py +0 -0
  24. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/example/README.md +0 -0
  25. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/example/config_example.json +0 -0
  26. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/setup.cfg +0 -0
  27. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/tests/conftest.py +0 -0
  28. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/tests/unit/__init__.py +0 -0
  29. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/tests/unit/api/__init__.py +0 -0
  30. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/tests/unit/api/test_extract.py +0 -0
  31. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/tests/unit/api/test_local.py +0 -0
  32. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/tests/unit/api/test_parse.py +0 -0
  33. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/tests/unit/api/test_pipeline.py +0 -0
  34. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/tests/unit/api/test_workflows.py +0 -0
  35. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/tests/unit/connectors/test_ftp.py +0 -0
  36. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/tests/unit/connectors/test_local_connectors.py +0 -0
  37. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/tests/unit/connectors/test_milvus.py +0 -0
  38. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/tests/unit/connectors/test_qdrant.py +0 -0
  39. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/tests/unit/connectors/test_s3.py +0 -0
  40. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/tests/unit/connectors/test_smb.py +0 -0
  41. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/tests/unit/connectors/test_utils.py +0 -0
  42. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/tests/unit/models/test_local.py +0 -0
  43. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/tests/unit/models/test_workflows.py +0 -0
  44. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/tests/unit/test_base.py +0 -0
  45. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/tests/unit/test_client.py +0 -0
  46. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/tests/unit/test_config.py +0 -0
  47. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/tests/unit/test_exceptions.py +0 -0
  48. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/tests/unit/test_http.py +0 -0
  49. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/xparse_client/__init__.py +0 -0
  50. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/xparse_client/_client.py +0 -0
  51. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/xparse_client/_config.py +0 -0
  52. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/xparse_client/api/__init__.py +0 -0
  53. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/xparse_client/api/parse.py +0 -0
  54. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/xparse_client/api/workflows.py +0 -0
  55. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/xparse_client/connectors/__init__.py +0 -0
  56. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/xparse_client/connectors/_utils.py +0 -0
  57. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/xparse_client/connectors/destinations/__init__.py +0 -0
  58. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/xparse_client/connectors/destinations/base.py +0 -0
  59. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/xparse_client/connectors/destinations/local.py +0 -0
  60. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/xparse_client/connectors/destinations/milvus.py +0 -0
  61. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/xparse_client/connectors/destinations/s3.py +0 -0
  62. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/xparse_client/connectors/sources/__init__.py +0 -0
  63. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/xparse_client/connectors/sources/base.py +0 -0
  64. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/xparse_client/connectors/sources/local.py +0 -0
  65. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/xparse_client/connectors/sources/s3.py +0 -0
  66. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/xparse_client/exceptions.py +0 -0
  67. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/xparse_client/models/__init__.py +0 -0
  68. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/xparse_client/models/chunk.py +0 -0
  69. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/xparse_client/models/embed.py +0 -0
  70. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/xparse_client/models/local.py +0 -0
  71. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/xparse_client/models/workflows.py +0 -0
  72. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/xparse_client.egg-info/SOURCES.txt +0 -0
  73. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/xparse_client.egg-info/dependency_links.txt +0 -0
  74. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/xparse_client.egg-info/requires.txt +0 -0
  75. {xparse_client-0.3.0b1 → xparse_client-0.3.0b6}/xparse_client.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: xparse-client
3
- Version: 0.3.0b1
3
+ Version: 0.3.0b6
4
4
  Summary: 面向 Agent 和 RAG 的文档处理 Pipeline 客户端
5
5
  Author-email: INTSIG-TEXTIN <support@textin.com>
6
6
  License-Expression: MIT
@@ -213,17 +213,17 @@ schema = {
213
213
  "author": {"type": "string", "description": "作者"},
214
214
  "date": {"type": "string", "description": "日期"}
215
215
  },
216
- "required": ["title"]
216
+ "required": ["title", "author", "date"]
217
217
  }
218
218
 
219
219
  with open("document.pdf", "rb") as f:
220
220
  result = client.extract.extract(
221
221
  file=f,
222
222
  filename="document.pdf",
223
- config=ExtractConfig(schema=schema)
223
+ extract_config=ExtractConfig(schema=schema)
224
224
  )
225
225
 
226
- print(result.extracted_data)
226
+ print(result.result)
227
227
  ```
228
228
 
229
229
  ### 3. 本地批处理
@@ -164,17 +164,17 @@ schema = {
164
164
  "author": {"type": "string", "description": "作者"},
165
165
  "date": {"type": "string", "description": "日期"}
166
166
  },
167
- "required": ["title"]
167
+ "required": ["title", "author", "date"]
168
168
  }
169
169
 
170
170
  with open("document.pdf", "rb") as f:
171
171
  result = client.extract.extract(
172
172
  file=f,
173
173
  filename="document.pdf",
174
- config=ExtractConfig(schema=schema)
174
+ extract_config=ExtractConfig(schema=schema)
175
175
  )
176
176
 
177
- print(result.extracted_data)
177
+ print(result.result)
178
178
  ```
179
179
 
180
180
  ### 3. 本地批处理
@@ -165,7 +165,7 @@ def example_3_error_handling():
165
165
 
166
166
  except Exception as e:
167
167
  print(f"\n⏸️ 捕获到超时异常: {type(e).__name__}")
168
- print(f" 这是正常的,演示了如何处理超时情况")
168
+ print(" 这是正常的,演示了如何处理超时情况")
169
169
 
170
170
  finally:
171
171
  test_file.unlink(missing_ok=True)
@@ -91,12 +91,12 @@ def example_1_config_file():
91
91
  XParseClient.from_env()
92
92
 
93
93
  # 演示如何从配置构建 source 和 destination(不实际创建)
94
- print(f"\n✅ 配置加载成功!")
95
- print(f" - Source 配置:")
94
+ print("\n✅ 配置加载成功!")
95
+ print(" - Source 配置:")
96
96
  print(f" 目录: {loaded_config['source']['directory']}")
97
97
  print(f" 模式: {loaded_config['source']['pattern']}")
98
98
  print(f" 递归: {loaded_config['source']['recursive']}")
99
- print(f" - Destination 配置:")
99
+ print(" - Destination 配置:")
100
100
  print(f" 输出目录: {loaded_config['destination']['output_dir']}")
101
101
 
102
102
  # 构建 stages
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "xparse-client"
7
- version = "0.3.0b1"
7
+ version = "0.3.0b6"
8
8
  description = "面向 Agent 和 RAG 的文档处理 Pipeline 客户端"
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -89,8 +89,8 @@ def test_extract_stage_with_config():
89
89
  assert stage.type == "extract"
90
90
  assert stage.config.generate_citations is True
91
91
 
92
- # 序列化检查
93
- dumped = stage.model_dump()
92
+ # 序列化检查(使用 by_alias=True 来确保 schema_ 序列化为 schema)
93
+ dumped = stage.model_dump(by_alias=True)
94
94
  assert "schema" in dumped["config"]
95
95
  assert dumped["config"]["schema"] == schema
96
96
 
@@ -6,7 +6,7 @@
6
6
  from __future__ import annotations
7
7
 
8
8
  from abc import ABC
9
- from typing import TYPE_CHECKING, Any, TypeVar
9
+ from typing import TYPE_CHECKING, Any, Protocol, TypeVar, cast
10
10
 
11
11
  import httpx
12
12
 
@@ -14,7 +14,16 @@ if TYPE_CHECKING:
14
14
  from ._config import SDKConfiguration
15
15
  from ._http import HTTPClient
16
16
 
17
- T = TypeVar("T")
17
+
18
+ class PydanticModel(Protocol):
19
+ """Pydantic 模型协议"""
20
+
21
+ @classmethod
22
+ def model_validate(cls, obj: Any) -> Any:
23
+ ...
24
+
25
+
26
+ T = TypeVar("T", bound=PydanticModel)
18
27
 
19
28
 
20
29
  class BaseAPI(ABC): # noqa: B024
@@ -130,7 +139,7 @@ class BaseAPI(ABC): # noqa: B024
130
139
  if isinstance(data, dict) and "data" in data:
131
140
  data = data["data"]
132
141
 
133
- return model_class.model_validate(data)
142
+ return cast(T, model_class.model_validate(data))
134
143
 
135
144
  def _parse_list_response(
136
145
  self,
@@ -156,7 +165,7 @@ class BaseAPI(ABC): # noqa: B024
156
165
  if not isinstance(data, list):
157
166
  data = [data]
158
167
 
159
- return [model_class.model_validate(item) for item in data]
168
+ return [cast(T, model_class.model_validate(item)) for item in data]
160
169
 
161
170
  def _parse_raw_response(self, response: httpx.Response) -> dict[str, Any]:
162
171
  """解析原始 JSON 响应
@@ -171,9 +180,9 @@ class BaseAPI(ABC): # noqa: B024
171
180
 
172
181
  # 处理标准响应格式
173
182
  if isinstance(data, dict) and "data" in data:
174
- return data["data"]
183
+ return cast(dict[str, Any], data["data"])
175
184
 
176
- return data
185
+ return cast(dict[str, Any], data)
177
186
 
178
187
 
179
188
  __all__ = ["BaseAPI"]
@@ -53,7 +53,8 @@ def _extract_error_message(response: httpx.Response) -> str:
53
53
  return str(data[key])
54
54
  # 如果有嵌套的 error 对象
55
55
  if "error" in data and isinstance(data["error"], dict):
56
- return data["error"].get("message", str(data["error"]))
56
+ error_dict = data["error"]
57
+ return str(error_dict.get("message", error_dict))
57
58
  return str(data)
58
59
  except Exception:
59
60
  # 非 JSON 响应,返回文本内容
@@ -106,7 +107,7 @@ def raise_for_status(response: httpx.Response) -> None:
106
107
  message = _extract_error_message(response)
107
108
  response_body = response.text[:1000] if response.text else None
108
109
 
109
- common_kwargs = {
110
+ common_kwargs: dict[str, Any] = {
110
111
  "status_code": status_code,
111
112
  "request_id": request_id,
112
113
  "response_body": response_body,
@@ -140,24 +141,24 @@ def raise_for_status(response: httpx.Response) -> None:
140
141
  message = data.get("message", f"业务错误 code: {code}")
141
142
  response_body = response.text[:1000] if response.text else None
142
143
 
143
- common_kwargs = {
144
+ error_kwargs: dict[str, Any] = {
144
145
  "request_id": request_id,
145
146
  "response_body": response_body,
146
147
  }
147
148
 
148
149
  # 根据业务 code 映射到相应异常
149
150
  if code == 400:
150
- raise ValidationError(message, details=common_kwargs)
151
+ raise ValidationError(message, details=error_kwargs)
151
152
  elif code == 401:
152
- raise AuthenticationError(message, **common_kwargs)
153
+ raise AuthenticationError(message, **error_kwargs)
153
154
  elif code == 403:
154
- raise PermissionDeniedError(message, **common_kwargs)
155
+ raise PermissionDeniedError(message, **error_kwargs)
155
156
  elif code == 404:
156
- raise NotFoundError(message, **common_kwargs)
157
+ raise NotFoundError(message, **error_kwargs)
157
158
  elif code >= 500:
158
- raise ServerError(message, **common_kwargs)
159
+ raise ServerError(message, **error_kwargs)
159
160
  else:
160
- raise APIError(message, **common_kwargs)
161
+ raise APIError(message, **error_kwargs)
161
162
  except (ValueError, KeyError):
162
163
  # 如果无法解析 JSON 或没有 code 字段,认为是成功
163
164
  pass
@@ -79,7 +79,8 @@ class Extract(BaseAPI):
79
79
  ... "properties": {
80
80
  ... "invoice_number": {"type": "string"},
81
81
  ... "total_amount": {"type": "number"}
82
- ... }
82
+ ... },
83
+ ... "required": ["invoice_number", "total_amount"]
83
84
  ... },
84
85
  ... generate_citations=True
85
86
  ... )
@@ -89,15 +90,11 @@ class Extract(BaseAPI):
89
90
  data = {}
90
91
 
91
92
  if parse_config:
92
- data["parse_config"] = json.dumps(
93
- parse_config.model_dump(), ensure_ascii=False
94
- )
93
+ data["parse_config"] = json.dumps(parse_config.model_dump(), ensure_ascii=False)
95
94
 
96
95
  # 处理 extract_config
97
96
  if extract_config:
98
- data["extract_config"] = json.dumps(
99
- extract_config.model_dump(), ensure_ascii=False
100
- )
97
+ data["extract_config"] = json.dumps(extract_config.model_dump(), ensure_ascii=False)
101
98
  else:
102
99
  raise ValueError("extract_config is required")
103
100
 
@@ -105,5 +102,4 @@ class Extract(BaseAPI):
105
102
  return self._parse_response(response, ParseResponse)
106
103
 
107
104
 
108
-
109
105
  __all__ = ["Extract"]
@@ -24,11 +24,11 @@ Example:
24
24
 
25
25
  from __future__ import annotations
26
26
 
27
- from typing import TYPE_CHECKING, Callable, Literal
27
+ from typing import TYPE_CHECKING, Any, Callable, Literal
28
28
 
29
29
  from .._base import BaseAPI
30
30
  from ..models.local import FailedFile, WorkflowResult
31
- from ..models.pipeline import PipelineStage
31
+ from ..models.pipeline import PipelineConfig, PipelineStage
32
32
 
33
33
  if TYPE_CHECKING:
34
34
  pass
@@ -53,8 +53,9 @@ class Local(BaseAPI):
53
53
  self,
54
54
  source, # Union[LocalSource, S3Source, FtpSource, SmbSource]
55
55
  destination, # Union[LocalDestination, S3Destination, MilvusDestination, QdrantDestination]
56
- stages: list[PipelineStage],
56
+ stages: list[PipelineStage | dict[str, Any]],
57
57
  *,
58
+ pipeline_config: PipelineConfig | None = None,
58
59
  progress_callback: Callable[[int, int, str], None] | None = None,
59
60
  on_error: Literal["stop", "continue", "retry"] = "stop",
60
61
  max_retries: int = 3,
@@ -75,6 +76,7 @@ class Local(BaseAPI):
75
76
  source: 数据源(本地 Connector 对象)
76
77
  destination: 输出目的地(本地 Connector 对象)
77
78
  stages: 处理阶段列表
79
+ pipeline_config: Pipeline 配置(包含中间结果保存配置)
78
80
  progress_callback: 进度回调函数 (current, total, message) -> None
79
81
  on_error: 错误处理策略 ("stop"|"continue"|"retry")
80
82
  max_retries: 最大重试次数(on_error="retry" 时生效)
@@ -125,20 +127,56 @@ class Local(BaseAPI):
125
127
 
126
128
  # 调用 Pipeline API
127
129
  from .pipeline import PipelineAPI
130
+
128
131
  pipeline_api = PipelineAPI(self._config, self._http)
129
132
  result = pipeline_api.execute(
130
133
  file=file_bytes,
131
134
  filename=filename,
132
135
  stages=stages,
136
+ config=pipeline_config,
133
137
  data_source=data_source,
134
138
  )
135
139
 
140
+ # 处理中间结果
141
+ if (
142
+ pipeline_config
143
+ and pipeline_config.include_intermediate_results
144
+ and pipeline_config.intermediate_results_destination
145
+ and hasattr(result, "intermediate_results")
146
+ and result.intermediate_results
147
+ ):
148
+ for stage_result in result.intermediate_results:
149
+ stage_name = stage_result.get("stage")
150
+ elements = stage_result.get("elements", [])
151
+ if stage_name and elements:
152
+ # 转换 elements 为字典列表
153
+ elements_data = []
154
+ for elem in elements:
155
+ if hasattr(elem, "model_dump"):
156
+ elements_data.append(elem.model_dump())
157
+ elif isinstance(elem, dict):
158
+ elements_data.append(elem)
159
+ else:
160
+ elements_data.append(elem)
161
+
162
+ metadata_with_stage = {
163
+ "filename": filename,
164
+ "file_path": file_path,
165
+ "stage": stage_name,
166
+ }
167
+ if result.stats and hasattr(result.stats, "record_id"):
168
+ metadata_with_stage["record_id"] = result.stats.record_id
169
+
170
+ pipeline_config.intermediate_results_destination.write(
171
+ elements_data, metadata_with_stage
172
+ )
173
+
136
174
  # 准备写入数据
137
175
  # 如果是向量数据库,写入 embeddings
138
- if hasattr(result, 'elements') and result.elements:
176
+ if hasattr(result, "elements") and result.elements:
139
177
  elements_data = []
140
178
  for elem in result.elements:
141
- elem_dict = elem.model_dump() if hasattr(elem, 'model_dump') else elem
179
+ elem_dict = elem.model_dump() if hasattr(elem, "model_dump") else elem
142
180
  elements_data.append(elem_dict)
143
181
  else:
144
182
  elements_data = []
@@ -148,8 +186,8 @@ class Local(BaseAPI):
148
186
  "filename": filename,
149
187
  "file_path": file_path,
150
188
  }
151
- if hasattr(result, 'record_id'):
152
- metadata["record_id"] = result.record_id
189
+ if result.stats and hasattr(result.stats, "record_id"):
190
+ metadata["record_id"] = result.stats.record_id
153
191
 
154
192
  destination.write(elements_data, metadata)
155
193
 
@@ -164,11 +202,13 @@ class Local(BaseAPI):
164
202
  else:
165
203
  # continue: 记录失败并继续下一个文件
166
204
  failed += 1
167
- failed_files.append(FailedFile(
168
- file_path=file_path,
169
- error=error_msg,
170
- retry_count=0, # HTTP客户端内部已经处理了重试
171
- ))
205
+ failed_files.append(
206
+ FailedFile(
207
+ file_path=file_path,
208
+ error=error_msg,
209
+ retry_count=0, # HTTP客户端内部已经处理了重试
210
+ )
211
+ )
172
212
 
173
213
  # 计算总耗时
174
214
  duration = time.time() - start_time
@@ -120,7 +120,9 @@ class PipelineAPI(BaseAPI):
120
120
  }
121
121
 
122
122
  if config:
123
- data["config"] = json.dumps(config.model_dump(), ensure_ascii=False)
123
+ # 只序列化 API 需要的字段(排除 intermediate_results_destination)
124
+ config_dict = config.model_dump(exclude={"intermediate_results_destination"})
125
+ data["config"] = json.dumps(config_dict, ensure_ascii=False)
124
126
  if data_source:
125
127
  data["data_source"] = json.dumps(data_source, ensure_ascii=False)
126
128
 
@@ -90,7 +90,7 @@ class QdrantDestination(Destination, VectorDestinationMixin):
90
90
  self.dimension = dimension
91
91
  self._PointStruct = PointStruct
92
92
 
93
- client_kwargs = {"url": url}
93
+ client_kwargs: dict[str, Any] = {"url": url}
94
94
  if api_key:
95
95
  client_kwargs["api_key"] = api_key
96
96
  if prefer_grpc:
@@ -90,7 +90,7 @@ class FtpSource(Source):
90
90
  SourceError: 列出文件失败
91
91
  """
92
92
  try:
93
- files = []
93
+ files: list[str] = []
94
94
  current_dir = self.client.pwd()
95
95
 
96
96
  if self.recursive:
@@ -178,7 +178,7 @@ class FtpSource(Source):
178
178
 
179
179
  # 回退到 LIST 命令
180
180
  try:
181
- lines = []
181
+ lines: list[str] = []
182
182
  self.client.retrlines("LIST", lines.append)
183
183
  for line in lines:
184
184
  parts = line.split()
@@ -125,7 +125,7 @@ class SmbSource(Source):
125
125
  SourceError: 列出文件失败
126
126
  """
127
127
  try:
128
- files = []
128
+ files: list[str] = []
129
129
  base_path = "/" if not self.path else f"/{self.path}"
130
130
 
131
131
  self._list_recursive(base_path, base_path, files)
@@ -11,7 +11,7 @@ class ExtractConfig(BaseModel):
11
11
  """抽取配置
12
12
 
13
13
  Attributes:
14
- schema: JSON Schema 定义抽取的结构
14
+ schema_: JSON Schema 定义抽取的结构(序列化为 schema)
15
15
  generate_citations: 是否生成引用
16
16
  stamp: 是否添加时间戳
17
17
 
@@ -23,11 +23,12 @@ class ExtractConfig(BaseModel):
23
23
  ... "total_amount": {"type": "number"}
24
24
  ... }
25
25
  ... }
26
- >>> config = ExtractConfig(schema=schema)
26
+ >>> config = ExtractConfig(schema_=schema)
27
27
  """
28
28
 
29
- schema: dict[str, Any] = Field(
29
+ schema_: dict[str, Any] = Field(
30
30
  default_factory=dict,
31
+ alias="schema",
31
32
  description="JSON Schema 定义抽取的结构"
32
33
  )
33
34
  generate_citations: bool = False
@@ -35,6 +36,7 @@ class ExtractConfig(BaseModel):
35
36
 
36
37
  model_config = {
37
38
  "extra": "allow",
39
+ "populate_by_name": True, # 允许使用 schema_ 和 schema 两种名称
38
40
  }
39
41
 
40
42
 
@@ -63,17 +63,13 @@ class ParseResponse(BaseModel):
63
63
 
64
64
  Attributes:
65
65
  elements: 解析后的元素列表(Parse/Chunk/Embed 返回)
66
- extract_result: 提取结果(Extract 返回)
66
+ result: 提取结果(Extract 返回)
67
67
  success_count: 成功数量
68
- consume_time: 耗时
69
- record_id: 记录 ID
70
68
  """
71
69
 
72
70
  elements: list[Element] = Field(default_factory=list)
73
- extract_result: dict[str, Any] | None = None # Extract API 返回
71
+ result: dict[str, Any] | None = None # Extract API 返回
74
72
  success_count: int | None = None
75
- consume_time: str | None = None
76
- record_id: str | None = None
77
73
 
78
74
  model_config = {"extra": "allow"}
79
75
 
@@ -99,11 +99,13 @@ class PipelineConfig(BaseModel):
99
99
 
100
100
  Attributes:
101
101
  include_intermediate_results: 是否包含中间结果
102
+ intermediate_results_destination: 中间结果保存目的地(仅支持 LocalDestination)
102
103
  """
103
104
 
104
105
  include_intermediate_results: bool = False
106
+ intermediate_results_destination: Any = None # LocalDestination 对象
105
107
 
106
- model_config = {"extra": "allow"}
108
+ model_config = {"extra": "allow", "arbitrary_types_allowed": True}
107
109
 
108
110
 
109
111
  class PipelineResponse(BaseModel):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: xparse-client
3
- Version: 0.3.0b1
3
+ Version: 0.3.0b6
4
4
  Summary: 面向 Agent 和 RAG 的文档处理 Pipeline 客户端
5
5
  Author-email: INTSIG-TEXTIN <support@textin.com>
6
6
  License-Expression: MIT
@@ -213,17 +213,17 @@ schema = {
213
213
  "author": {"type": "string", "description": "作者"},
214
214
  "date": {"type": "string", "description": "日期"}
215
215
  },
216
- "required": ["title"]
216
+ "required": ["title", "author", "date"]
217
217
  }
218
218
 
219
219
  with open("document.pdf", "rb") as f:
220
220
  result = client.extract.extract(
221
221
  file=f,
222
222
  filename="document.pdf",
223
- config=ExtractConfig(schema=schema)
223
+ extract_config=ExtractConfig(schema=schema)
224
224
  )
225
225
 
226
- print(result.extracted_data)
226
+ print(result.result)
227
227
  ```
228
228
 
229
229
  ### 3. 本地批处理
File without changes