xparse-client 0.3.0b4__py3-none-any.whl → 0.3.0b6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
example/2_async_job.py CHANGED
@@ -165,7 +165,7 @@ def example_3_error_handling():
165
165
 
166
166
  except Exception as e:
167
167
  print(f"\n⏸️ 捕获到超时异常: {type(e).__name__}")
168
- print(f" 这是正常的,演示了如何处理超时情况")
168
+ print(" 这是正常的,演示了如何处理超时情况")
169
169
 
170
170
  finally:
171
171
  test_file.unlink(missing_ok=True)
@@ -91,12 +91,12 @@ def example_1_config_file():
91
91
  XParseClient.from_env()
92
92
 
93
93
  # 演示如何从配置构建 source 和 destination(不实际创建)
94
- print(f"\n✅ 配置加载成功!")
95
- print(f" - Source 配置:")
94
+ print("\n✅ 配置加载成功!")
95
+ print(" - Source 配置:")
96
96
  print(f" 目录: {loaded_config['source']['directory']}")
97
97
  print(f" 模式: {loaded_config['source']['pattern']}")
98
98
  print(f" 递归: {loaded_config['source']['recursive']}")
99
- print(f" - Destination 配置:")
99
+ print(" - Destination 配置:")
100
100
  print(f" 输出目录: {loaded_config['destination']['output_dir']}")
101
101
 
102
102
  # 构建 stages
@@ -89,8 +89,8 @@ def test_extract_stage_with_config():
89
89
  assert stage.type == "extract"
90
90
  assert stage.config.generate_citations is True
91
91
 
92
- # 序列化检查
93
- dumped = stage.model_dump()
92
+ # 序列化检查(使用 by_alias=True 来确保 schema_ 序列化为 schema)
93
+ dumped = stage.model_dump(by_alias=True)
94
94
  assert "schema" in dumped["config"]
95
95
  assert dumped["config"]["schema"] == schema
96
96
 
xparse_client/_base.py CHANGED
@@ -6,7 +6,7 @@
6
6
  from __future__ import annotations
7
7
 
8
8
  from abc import ABC
9
- from typing import TYPE_CHECKING, Any, TypeVar
9
+ from typing import TYPE_CHECKING, Any, Protocol, TypeVar, cast
10
10
 
11
11
  import httpx
12
12
 
@@ -14,7 +14,16 @@ if TYPE_CHECKING:
14
14
  from ._config import SDKConfiguration
15
15
  from ._http import HTTPClient
16
16
 
17
- T = TypeVar("T")
17
+
18
+ class PydanticModel(Protocol):
19
+ """Pydantic 模型协议"""
20
+
21
+ @classmethod
22
+ def model_validate(cls, obj: Any) -> Any:
23
+ ...
24
+
25
+
26
+ T = TypeVar("T", bound=PydanticModel)
18
27
 
19
28
 
20
29
  class BaseAPI(ABC): # noqa: B024
@@ -130,7 +139,7 @@ class BaseAPI(ABC): # noqa: B024
130
139
  if isinstance(data, dict) and "data" in data:
131
140
  data = data["data"]
132
141
 
133
- return model_class.model_validate(data)
142
+ return cast(T, model_class.model_validate(data))
134
143
 
135
144
  def _parse_list_response(
136
145
  self,
@@ -156,7 +165,7 @@ class BaseAPI(ABC): # noqa: B024
156
165
  if not isinstance(data, list):
157
166
  data = [data]
158
167
 
159
- return [model_class.model_validate(item) for item in data]
168
+ return [cast(T, model_class.model_validate(item)) for item in data]
160
169
 
161
170
  def _parse_raw_response(self, response: httpx.Response) -> dict[str, Any]:
162
171
  """解析原始 JSON 响应
@@ -171,9 +180,9 @@ class BaseAPI(ABC): # noqa: B024
171
180
 
172
181
  # 处理标准响应格式
173
182
  if isinstance(data, dict) and "data" in data:
174
- return data["data"]
183
+ return cast(dict[str, Any], data["data"])
175
184
 
176
- return data
185
+ return cast(dict[str, Any], data)
177
186
 
178
187
 
179
188
  __all__ = ["BaseAPI"]
xparse_client/_http.py CHANGED
@@ -53,7 +53,8 @@ def _extract_error_message(response: httpx.Response) -> str:
53
53
  return str(data[key])
54
54
  # 如果有嵌套的 error 对象
55
55
  if "error" in data and isinstance(data["error"], dict):
56
- return data["error"].get("message", str(data["error"]))
56
+ error_dict = data["error"]
57
+ return str(error_dict.get("message", error_dict))
57
58
  return str(data)
58
59
  except Exception:
59
60
  # 非 JSON 响应,返回文本内容
@@ -106,7 +107,7 @@ def raise_for_status(response: httpx.Response) -> None:
106
107
  message = _extract_error_message(response)
107
108
  response_body = response.text[:1000] if response.text else None
108
109
 
109
- common_kwargs = {
110
+ common_kwargs: dict[str, Any] = {
110
111
  "status_code": status_code,
111
112
  "request_id": request_id,
112
113
  "response_body": response_body,
@@ -140,24 +141,24 @@ def raise_for_status(response: httpx.Response) -> None:
140
141
  message = data.get("message", f"业务错误 code: {code}")
141
142
  response_body = response.text[:1000] if response.text else None
142
143
 
143
- common_kwargs = {
144
+ error_kwargs: dict[str, Any] = {
144
145
  "request_id": request_id,
145
146
  "response_body": response_body,
146
147
  }
147
148
 
148
149
  # 根据业务 code 映射到相应异常
149
150
  if code == 400:
150
- raise ValidationError(message, details=common_kwargs)
151
+ raise ValidationError(message, details=error_kwargs)
151
152
  elif code == 401:
152
- raise AuthenticationError(message, **common_kwargs)
153
+ raise AuthenticationError(message, **error_kwargs)
153
154
  elif code == 403:
154
- raise PermissionDeniedError(message, **common_kwargs)
155
+ raise PermissionDeniedError(message, **error_kwargs)
155
156
  elif code == 404:
156
- raise NotFoundError(message, **common_kwargs)
157
+ raise NotFoundError(message, **error_kwargs)
157
158
  elif code >= 500:
158
- raise ServerError(message, **common_kwargs)
159
+ raise ServerError(message, **error_kwargs)
159
160
  else:
160
- raise APIError(message, **common_kwargs)
161
+ raise APIError(message, **error_kwargs)
161
162
  except (ValueError, KeyError):
162
163
  # 如果无法解析 JSON 或没有 code 字段,认为是成功
163
164
  pass
@@ -79,7 +79,8 @@ class Extract(BaseAPI):
79
79
  ... "properties": {
80
80
  ... "invoice_number": {"type": "string"},
81
81
  ... "total_amount": {"type": "number"}
82
- ... }
82
+ ... },
83
+ ... "required": ["invoice_number", "total_amount"]
83
84
  ... },
84
85
  ... generate_citations=True
85
86
  ... )
@@ -89,15 +90,11 @@ class Extract(BaseAPI):
89
90
  data = {}
90
91
 
91
92
  if parse_config:
92
- data["parse_config"] = json.dumps(
93
- parse_config.model_dump(), ensure_ascii=False
94
- )
93
+ data["parse_config"] = json.dumps(parse_config.model_dump(), ensure_ascii=False)
95
94
 
96
95
  # 处理 extract_config
97
96
  if extract_config:
98
- data["extract_config"] = json.dumps(
99
- extract_config.model_dump(), ensure_ascii=False
100
- )
97
+ data["extract_config"] = json.dumps(extract_config.model_dump(), ensure_ascii=False)
101
98
  else:
102
99
  raise ValueError("extract_config is required")
103
100
 
@@ -105,5 +102,4 @@ class Extract(BaseAPI):
105
102
  return self._parse_response(response, ParseResponse)
106
103
 
107
104
 
108
-
109
105
  __all__ = ["Extract"]
@@ -24,7 +24,7 @@ Example:
24
24
 
25
25
  from __future__ import annotations
26
26
 
27
- from typing import TYPE_CHECKING, Callable, Literal
27
+ from typing import TYPE_CHECKING, Any, Callable, Literal
28
28
 
29
29
  from .._base import BaseAPI
30
30
  from ..models.local import FailedFile, WorkflowResult
@@ -53,7 +53,7 @@ class Local(BaseAPI):
53
53
  self,
54
54
  source, # Union[LocalSource, S3Source, FtpSource, SmbSource]
55
55
  destination, # Union[LocalDestination, S3Destination, MilvusDestination, QdrantDestination]
56
- stages: list[PipelineStage],
56
+ stages: list[PipelineStage | dict[str, Any]],
57
57
  *,
58
58
  pipeline_config: PipelineConfig | None = None,
59
59
  progress_callback: Callable[[int, int, str], None] | None = None,
@@ -90,7 +90,7 @@ class QdrantDestination(Destination, VectorDestinationMixin):
90
90
  self.dimension = dimension
91
91
  self._PointStruct = PointStruct
92
92
 
93
- client_kwargs = {"url": url}
93
+ client_kwargs: dict[str, Any] = {"url": url}
94
94
  if api_key:
95
95
  client_kwargs["api_key"] = api_key
96
96
  if prefer_grpc:
@@ -90,7 +90,7 @@ class FtpSource(Source):
90
90
  SourceError: 列出文件失败
91
91
  """
92
92
  try:
93
- files = []
93
+ files: list[str] = []
94
94
  current_dir = self.client.pwd()
95
95
 
96
96
  if self.recursive:
@@ -178,7 +178,7 @@ class FtpSource(Source):
178
178
 
179
179
  # 回退到 LIST 命令
180
180
  try:
181
- lines = []
181
+ lines: list[str] = []
182
182
  self.client.retrlines("LIST", lines.append)
183
183
  for line in lines:
184
184
  parts = line.split()
@@ -125,7 +125,7 @@ class SmbSource(Source):
125
125
  SourceError: 列出文件失败
126
126
  """
127
127
  try:
128
- files = []
128
+ files: list[str] = []
129
129
  base_path = "/" if not self.path else f"/{self.path}"
130
130
 
131
131
  self._list_recursive(base_path, base_path, files)
@@ -11,7 +11,7 @@ class ExtractConfig(BaseModel):
11
11
  """抽取配置
12
12
 
13
13
  Attributes:
14
- schema: JSON Schema 定义抽取的结构
14
+ schema_: JSON Schema 定义抽取的结构(序列化为 schema)
15
15
  generate_citations: 是否生成引用
16
16
  stamp: 是否添加时间戳
17
17
 
@@ -23,11 +23,12 @@ class ExtractConfig(BaseModel):
23
23
  ... "total_amount": {"type": "number"}
24
24
  ... }
25
25
  ... }
26
- >>> config = ExtractConfig(schema=schema)
26
+ >>> config = ExtractConfig(schema_=schema)
27
27
  """
28
28
 
29
- schema: dict[str, Any] = Field(
29
+ schema_: dict[str, Any] = Field(
30
30
  default_factory=dict,
31
+ alias="schema",
31
32
  description="JSON Schema 定义抽取的结构"
32
33
  )
33
34
  generate_citations: bool = False
@@ -35,6 +36,7 @@ class ExtractConfig(BaseModel):
35
36
 
36
37
  model_config = {
37
38
  "extra": "allow",
39
+ "populate_by_name": True, # 允许使用 schema_ 和 schema 两种名称
38
40
  }
39
41
 
40
42
 
@@ -63,17 +63,13 @@ class ParseResponse(BaseModel):
63
63
 
64
64
  Attributes:
65
65
  elements: 解析后的元素列表(Parse/Chunk/Embed 返回)
66
- extract_result: 提取结果(Extract 返回)
66
+ result: 提取结果(Extract 返回)
67
67
  success_count: 成功数量
68
- consume_time: 耗时
69
- record_id: 记录 ID
70
68
  """
71
69
 
72
70
  elements: list[Element] = Field(default_factory=list)
73
- extract_result: dict[str, Any] | None = None # Extract API 返回
71
+ result: dict[str, Any] | None = None # Extract API 返回
74
72
  success_count: int | None = None
75
- consume_time: str | None = None
76
- record_id: str | None = None
77
73
 
78
74
  model_config = {"extra": "allow"}
79
75
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: xparse-client
3
- Version: 0.3.0b4
3
+ Version: 0.3.0b6
4
4
  Summary: 面向 Agent 和 RAG 的文档处理 Pipeline 客户端
5
5
  Author-email: INTSIG-TEXTIN <support@textin.com>
6
6
  License-Expression: MIT
@@ -213,17 +213,17 @@ schema = {
213
213
  "author": {"type": "string", "description": "作者"},
214
214
  "date": {"type": "string", "description": "日期"}
215
215
  },
216
- "required": ["title"]
216
+ "required": ["title", "author", "date"]
217
217
  }
218
218
 
219
219
  with open("document.pdf", "rb") as f:
220
220
  result = client.extract.extract(
221
221
  file=f,
222
222
  filename="document.pdf",
223
- config=ExtractConfig(schema=schema)
223
+ extract_config=ExtractConfig(schema=schema)
224
224
  )
225
225
 
226
- print(result.extracted_data)
226
+ print(result.result)
227
227
  ```
228
228
 
229
229
  ### 3. 本地批处理
@@ -1,7 +1,7 @@
1
1
  example/1_basic_api_usage.py,sha256=x_ZHEWXz6z7qp-sMLBvq7Vpu6nA7YxVCizyQpDday6M,5802
2
- example/2_async_job.py,sha256=MCjWii3ksmONiIyCWviBrKPZIwrlCPDvmUPP_F8qXPM,6353
2
+ example/2_async_job.py,sha256=CT03sKz2dwTJRqSde8s5o75oe-p3ze43i0E1i_-Ai5Q,6352
3
3
  example/3_local_workflow.py,sha256=ZxoT8N0Nz4_s-bkLF0l6UDqBJk_eZZi99WUtUhJseXs,8980
4
- example/4_advanced_workflow.py,sha256=cgG0xke8-WNPxqx7iSTl9qgoJsVctXf5mjDkp-SaQho,10159
4
+ example/4_advanced_workflow.py,sha256=KH6gmzoKi6oeHIPDv9snw1YehynHTDqWVq1WFtoqGws,10156
5
5
  example/README.md,sha256=d-DumaZwbV96K6ZTEryAlDSk9HUhRBg0gG_Yekx9PqA,2336
6
6
  example/config_example.json,sha256=mBaLmKbgxFBKUgA-FtCTiLMtq24z0QRwKbPeYDGwA9I,2517
7
7
  tests/conftest.py,sha256=h6sy9RNQmXTQNTu86h7ATDAg-9wI4cKjzKXUbNNH5q8,10037
@@ -25,17 +25,17 @@ tests/unit/connectors/test_s3.py,sha256=Xbo-S9khgS1S3LM3BUIuYp-28YLEmEW0SP1O-iJ8
25
25
  tests/unit/connectors/test_smb.py,sha256=yrI_xNeOe84vONyxXNOGLbWa7tcyxjzo9G5Z_Tjzn-w,12378
26
26
  tests/unit/connectors/test_utils.py,sha256=RfnzTW5-9w2f5YeLbNTB_z3vVnsUSL7DwXNobo7xZUA,10302
27
27
  tests/unit/models/test_local.py,sha256=QnxzWKOivqeITd0CPzZLEzGbaMDtiwsPiwyEHiPpcPY,1325
28
- tests/unit/models/test_pipeline_stages.py,sha256=vT7Pd0lRWdChTQJuCZ3iUBx8C0xFXQXTBVB6jtWtEKU,3972
28
+ tests/unit/models/test_pipeline_stages.py,sha256=LR7OmMhVAAsV085x-l3d5jQ-efl04WMyrQFjVzXEvws,4049
29
29
  tests/unit/models/test_workflows.py,sha256=Al4d3SQI1jTz585i1kD569TZx4IWNrhyf3_Le6-cq2k,1692
30
30
  xparse_client/__init__.py,sha256=FyR7W0eteapDoPj2pU4VoTobM8XR2B7f_g4yVI-v2v8,3956
31
- xparse_client/_base.py,sha256=Vx85kDns565zwv6Zl6_8jl6b43nNprfBHbqdCVPGGTc,4420
31
+ xparse_client/_base.py,sha256=IPYdHREM0CUvZ_kWUp4olYQfsfKzPTQoZuIrNSanKq4,4661
32
32
  xparse_client/_client.py,sha256=ZGxZ9xhUitTP-eRhte57ekqUkrXxNubFDL5gojajA2s,6964
33
33
  xparse_client/_config.py,sha256=BG3cEW5ywKhmtfHjfTFrdezs2Ugc9P2Y85tzu5OZZ4M,6480
34
- xparse_client/_http.py,sha256=OrnrkTeuG5tEpuD-a1ixLIFPdiqnVIYtuZCi3uXAzJ4,11381
34
+ xparse_client/_http.py,sha256=Y0-x8CZbHVbi4Jmln1d60oD_Pz1nwQAf-0sbON4B9qo,11443
35
35
  xparse_client/exceptions.py,sha256=FNozgCFaQSsdeyxE6P-3CZItkTSfFJcIQVrddHWKN0s,9521
36
36
  xparse_client/api/__init__.py,sha256=MZ3wBTDU666efOBv81MKKtc1UkjUsjzIEOpsnDUAHD4,203
37
- xparse_client/api/extract.py,sha256=87tH-y4sXPKW35l-d0GmZdK9RIXWq4tfmGZXYJhIN60,3011
38
- xparse_client/api/local.py,sha256=yAEYj9a_pGra08_PTr_FaJrdUz7Pyh7HXq0Rb_6bvH0,8192
37
+ xparse_client/api/extract.py,sha256=9wHPtRg993tKgxnwYnBPyXO-PwYA_mK11YkBcW7hBiw,3026
38
+ xparse_client/api/local.py,sha256=dARW6RjiNsxliWxs7cB6qQl9eTZNvyjBpOG8qXyaIaw,8214
39
39
  xparse_client/api/parse.py,sha256=nvpu5kiN43QlyMW3N6vMruVADu6MdjS-TMbLFjBh-cc,5870
40
40
  xparse_client/api/pipeline.py,sha256=bPCSpWjER-7c7L0iMQ_Xz1VTe5DARMsX5XIcrayGKzw,4530
41
41
  xparse_client/api/workflows.py,sha256=29_Rf4bIH1fiJb_bWShkUm-et97ym5ymQFh5i3b5mDI,5945
@@ -45,24 +45,24 @@ xparse_client/connectors/destinations/__init__.py,sha256=hmvWAXj58yLmePMCEnqKbNh
45
45
  xparse_client/connectors/destinations/base.py,sha256=DfwwH5S8VPwEUyvWqUKDV9Bq-7A59lqoZHruOjB7L50,3218
46
46
  xparse_client/connectors/destinations/local.py,sha256=drZ5wkPMiYd8iOROmLJR3eoCXcYKUpAPPjSFpafeNW8,2542
47
47
  xparse_client/connectors/destinations/milvus.py,sha256=MhElUsIMY1g6_c_bD-wjpWOM5PiuZ9bLsgXACW7EY1c,7339
48
- xparse_client/connectors/destinations/qdrant.py,sha256=Mo1wCwtsM7OIMLZ0HsEBnvE8N9BGnyTGBGJ1B8oYIew,7661
48
+ xparse_client/connectors/destinations/qdrant.py,sha256=iU9NxZBhqp-94-1TkG32fDOjg8i4bT9Ox0F2E-2KODg,7677
49
49
  xparse_client/connectors/destinations/s3.py,sha256=2aeTW0cNUVPLBL8up9InFfgXBcFDPtqFNGa5FbUphhk,4728
50
50
  xparse_client/connectors/sources/__init__.py,sha256=830EDkdGJtPC2p1MhIP108uLibGeMoXcXAwPAEACPvQ,1143
51
51
  xparse_client/connectors/sources/base.py,sha256=fbRWFWuIcw_XWrOPpPkBnQ0Pf2R9N7Ry4523rxWcOpc,1863
52
- xparse_client/connectors/sources/ftp.py,sha256=bjOjnd-2l0MZ2EAcGM3jvuTP7FMWBiQaEbgHzhIl3og,8170
52
+ xparse_client/connectors/sources/ftp.py,sha256=RwH8eeY-raD4BlF51kb_0BJuM3Vxjbnjpob-f1x_VgY,8192
53
53
  xparse_client/connectors/sources/local.py,sha256=ZYTGWdQmUpzdenn3JQ_p0u9-8x7xXk5Ani2GIAqTGyE,5156
54
54
  xparse_client/connectors/sources/s3.py,sha256=RCzinmz9cCqp9cFcfEozBapa2nllFvJCw9P-aLeOJQA,7219
55
- xparse_client/connectors/sources/smb.py,sha256=_F496mipGZJ0y17H-sYTlqAf2uJScxGyit7kirTyDKc,7786
55
+ xparse_client/connectors/sources/smb.py,sha256=9oWrgatdmsXMrFnl-du-lau3RMxlMPw2aQy7SFknUDM,7797
56
56
  xparse_client/models/__init__.py,sha256=zLBFirgXDisEeqKHqpsH1bN5p2zE7ZmCtVENZ69mlOg,1213
57
57
  xparse_client/models/chunk.py,sha256=VXKrATgk7bhlPVbJTFOUvOWu9oEnNMgMF6AnQarJD7o,986
58
58
  xparse_client/models/embed.py,sha256=Tg9iqOiLZy63m4T0q2z9cq3neuQbMnqDqT55-sVA7CE,1652
59
- xparse_client/models/extract.py,sha256=ZwMy0MsTt775Bq3UsZ9WlXFNCLrSnN0j1RgXQ-vFNqo,915
59
+ xparse_client/models/extract.py,sha256=OJigourjtIght32lt2ayULiRDlviCNwkphvFDhlasiM,1049
60
60
  xparse_client/models/local.py,sha256=hqKmyWTU_EhPk6qybtBzmbLybBBaiasTpqM4_zP-ipo,698
61
- xparse_client/models/parse.py,sha256=K34Fodo3emOPZJJQeu3MLjvA7HkVMxAuXk20UvO4lfQ,3297
61
+ xparse_client/models/parse.py,sha256=COgxroHkJce_S7d2HGd_1zBYhkpCutGtmjyLnpI2_eI,3154
62
62
  xparse_client/models/pipeline.py,sha256=24bDzhrVotQ8St6VLEJLvG2cZF0G2AMioKI34I_hJXI,3297
63
63
  xparse_client/models/workflows.py,sha256=BivMdGOAmhP6oYLQSGAAN7yml2xb7vHHrpzwLgN_Afk,1754
64
- xparse_client-0.3.0b4.dist-info/licenses/LICENSE,sha256=7iuki7DyWMGB8PBzsht7PUt0YjdIcPjrcXNyUFgMJsw,1070
65
- xparse_client-0.3.0b4.dist-info/METADATA,sha256=f8yFWh3712-v_y5oDQDbF5NpXCZOeCOGQU_AFqZEYYk,25453
66
- xparse_client-0.3.0b4.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
67
- xparse_client-0.3.0b4.dist-info/top_level.txt,sha256=oQGc_qysOmnSAaLjwB72wH8RBHRAmxB-_qb-Uj6u56o,28
68
- xparse_client-0.3.0b4.dist-info/RECORD,,
64
+ xparse_client-0.3.0b6.dist-info/licenses/LICENSE,sha256=7iuki7DyWMGB8PBzsht7PUt0YjdIcPjrcXNyUFgMJsw,1070
65
+ xparse_client-0.3.0b6.dist-info/METADATA,sha256=4qsRqND7Hj1l560SQ0k8wi3YwbKeGeAxnEN6D5D-3ZM,25471
66
+ xparse_client-0.3.0b6.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
67
+ xparse_client-0.3.0b6.dist-info/top_level.txt,sha256=oQGc_qysOmnSAaLjwB72wH8RBHRAmxB-_qb-Uj6u56o,28
68
+ xparse_client-0.3.0b6.dist-info/RECORD,,