xparse-client 0.3.0b2__py3-none-any.whl → 0.3.0b4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -28,7 +28,7 @@ from typing import TYPE_CHECKING, Callable, Literal
28
28
 
29
29
  from .._base import BaseAPI
30
30
  from ..models.local import FailedFile, WorkflowResult
31
- from ..models.pipeline import PipelineStage
31
+ from ..models.pipeline import PipelineConfig, PipelineStage
32
32
 
33
33
  if TYPE_CHECKING:
34
34
  pass
@@ -55,6 +55,7 @@ class Local(BaseAPI):
55
55
  destination, # Union[LocalDestination, S3Destination, MilvusDestination, QdrantDestination]
56
56
  stages: list[PipelineStage],
57
57
  *,
58
+ pipeline_config: PipelineConfig | None = None,
58
59
  progress_callback: Callable[[int, int, str], None] | None = None,
59
60
  on_error: Literal["stop", "continue", "retry"] = "stop",
60
61
  max_retries: int = 3,
@@ -75,6 +76,7 @@ class Local(BaseAPI):
75
76
  source: 数据源(本地 Connector 对象)
76
77
  destination: 输出目的地(本地 Connector 对象)
77
78
  stages: 处理阶段列表
79
+ pipeline_config: Pipeline 配置(包含中间结果保存配置)
78
80
  progress_callback: 进度回调函数 (current, total, message) -> None
79
81
  on_error: 错误处理策略 ("stop"|"continue"|"retry")
80
82
  max_retries: 最大重试次数(on_error="retry" 时生效)
@@ -131,9 +133,44 @@ class Local(BaseAPI):
131
133
  file=file_bytes,
132
134
  filename=filename,
133
135
  stages=stages,
136
+ config=pipeline_config,
134
137
  data_source=data_source,
135
138
  )
136
139
 
140
+ # 处理中间结果
141
+ if (
142
+ pipeline_config
143
+ and pipeline_config.include_intermediate_results
144
+ and pipeline_config.intermediate_results_destination
145
+ and hasattr(result, "intermediate_results")
146
+ and result.intermediate_results
147
+ ):
148
+ for stage_result in result.intermediate_results:
149
+ stage_name = stage_result.get("stage")
150
+ elements = stage_result.get("elements", [])
151
+ if stage_name and elements:
152
+ # 转换 elements 为字典列表
153
+ elements_data = []
154
+ for elem in elements:
155
+ if hasattr(elem, "model_dump"):
156
+ elements_data.append(elem.model_dump())
157
+ elif isinstance(elem, dict):
158
+ elements_data.append(elem)
159
+ else:
160
+ elements_data.append(elem)
161
+
162
+ metadata_with_stage = {
163
+ "filename": filename,
164
+ "file_path": file_path,
165
+ "stage": stage_name,
166
+ }
167
+ if result.stats and hasattr(result.stats, "record_id"):
168
+ metadata_with_stage["record_id"] = result.stats.record_id
169
+
170
+ pipeline_config.intermediate_results_destination.write(
171
+ elements_data, metadata_with_stage
172
+ )
173
+
137
174
  # 准备写入数据
138
175
  # 如果是向量数据库,写入 embeddings
139
176
  if hasattr(result, "elements") and result.elements:
@@ -120,7 +120,9 @@ class PipelineAPI(BaseAPI):
120
120
  }
121
121
 
122
122
  if config:
123
- data["config"] = json.dumps(config.model_dump(), ensure_ascii=False)
123
+ # 只序列化 API 需要的字段(排除 intermediate_results_destination)
124
+ config_dict = config.model_dump(exclude={"intermediate_results_destination"})
125
+ data["config"] = json.dumps(config_dict, ensure_ascii=False)
124
126
  if data_source:
125
127
  data["data_source"] = json.dumps(data_source, ensure_ascii=False)
126
128
 
@@ -99,11 +99,13 @@ class PipelineConfig(BaseModel):
99
99
 
100
100
  Attributes:
101
101
  include_intermediate_results: 是否包含中间结果
102
+ intermediate_results_destination: 中间结果保存目的地(仅支持 LocalDestination)
102
103
  """
103
104
 
104
105
  include_intermediate_results: bool = False
106
+ intermediate_results_destination: Any = None # LocalDestination 对象
105
107
 
106
- model_config = {"extra": "allow"}
108
+ model_config = {"extra": "allow", "arbitrary_types_allowed": True}
107
109
 
108
110
 
109
111
  class PipelineResponse(BaseModel):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: xparse-client
3
- Version: 0.3.0b2
3
+ Version: 0.3.0b4
4
4
  Summary: 面向 Agent 和 RAG 的文档处理 Pipeline 客户端
5
5
  Author-email: INTSIG-TEXTIN <support@textin.com>
6
6
  License-Expression: MIT
@@ -35,9 +35,9 @@ xparse_client/_http.py,sha256=OrnrkTeuG5tEpuD-a1ixLIFPdiqnVIYtuZCi3uXAzJ4,11381
35
35
  xparse_client/exceptions.py,sha256=FNozgCFaQSsdeyxE6P-3CZItkTSfFJcIQVrddHWKN0s,9521
36
36
  xparse_client/api/__init__.py,sha256=MZ3wBTDU666efOBv81MKKtc1UkjUsjzIEOpsnDUAHD4,203
37
37
  xparse_client/api/extract.py,sha256=87tH-y4sXPKW35l-d0GmZdK9RIXWq4tfmGZXYJhIN60,3011
38
- xparse_client/api/local.py,sha256=vFfPfdVPMYtJQK76iPfGw1DdFxNqfFiMIEPWw4jkrwo,6246
38
+ xparse_client/api/local.py,sha256=yAEYj9a_pGra08_PTr_FaJrdUz7Pyh7HXq0Rb_6bvH0,8192
39
39
  xparse_client/api/parse.py,sha256=nvpu5kiN43QlyMW3N6vMruVADu6MdjS-TMbLFjBh-cc,5870
40
- xparse_client/api/pipeline.py,sha256=HscuwRUOmcdUEjzqSyAMdNkcLyK_XbF_THjNH5DXsec,4356
40
+ xparse_client/api/pipeline.py,sha256=bPCSpWjER-7c7L0iMQ_Xz1VTe5DARMsX5XIcrayGKzw,4530
41
41
  xparse_client/api/workflows.py,sha256=29_Rf4bIH1fiJb_bWShkUm-et97ym5ymQFh5i3b5mDI,5945
42
42
  xparse_client/connectors/__init__.py,sha256=BLGxk9i8BsVCr4RMrCIh8b9cywBqXXmcDHqfF7C_FX0,1113
43
43
  xparse_client/connectors/_utils.py,sha256=Qb2OyUg8gvzyMBELx4h0GmjRSdT4d9C45v9m6NUNuMg,3515
@@ -59,10 +59,10 @@ xparse_client/models/embed.py,sha256=Tg9iqOiLZy63m4T0q2z9cq3neuQbMnqDqT55-sVA7CE
59
59
  xparse_client/models/extract.py,sha256=ZwMy0MsTt775Bq3UsZ9WlXFNCLrSnN0j1RgXQ-vFNqo,915
60
60
  xparse_client/models/local.py,sha256=hqKmyWTU_EhPk6qybtBzmbLybBBaiasTpqM4_zP-ipo,698
61
61
  xparse_client/models/parse.py,sha256=K34Fodo3emOPZJJQeu3MLjvA7HkVMxAuXk20UvO4lfQ,3297
62
- xparse_client/models/pipeline.py,sha256=KVmTYqyi6sbGMR9UQcEgMHFqhcSBPnoRgtGL4xp8QEA,3086
62
+ xparse_client/models/pipeline.py,sha256=24bDzhrVotQ8St6VLEJLvG2cZF0G2AMioKI34I_hJXI,3297
63
63
  xparse_client/models/workflows.py,sha256=BivMdGOAmhP6oYLQSGAAN7yml2xb7vHHrpzwLgN_Afk,1754
64
- xparse_client-0.3.0b2.dist-info/licenses/LICENSE,sha256=7iuki7DyWMGB8PBzsht7PUt0YjdIcPjrcXNyUFgMJsw,1070
65
- xparse_client-0.3.0b2.dist-info/METADATA,sha256=QYHCW6pdcMpJKkK3GRKFNMs9comTFatvSG1jXs_5Zv4,25453
66
- xparse_client-0.3.0b2.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
67
- xparse_client-0.3.0b2.dist-info/top_level.txt,sha256=oQGc_qysOmnSAaLjwB72wH8RBHRAmxB-_qb-Uj6u56o,28
68
- xparse_client-0.3.0b2.dist-info/RECORD,,
64
+ xparse_client-0.3.0b4.dist-info/licenses/LICENSE,sha256=7iuki7DyWMGB8PBzsht7PUt0YjdIcPjrcXNyUFgMJsw,1070
65
+ xparse_client-0.3.0b4.dist-info/METADATA,sha256=f8yFWh3712-v_y5oDQDbF5NpXCZOeCOGQU_AFqZEYYk,25453
66
+ xparse_client-0.3.0b4.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
67
+ xparse_client-0.3.0b4.dist-info/top_level.txt,sha256=oQGc_qysOmnSAaLjwB72wH8RBHRAmxB-_qb-Uj6u56o,28
68
+ xparse_client-0.3.0b4.dist-info/RECORD,,