xparse-client 0.3.0b1__tar.gz → 0.3.0b3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. {xparse_client-0.3.0b1/xparse_client.egg-info → xparse_client-0.3.0b3}/PKG-INFO +1 -1
  2. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/pyproject.toml +1 -1
  3. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/api/local.py +40 -10
  4. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/api/pipeline.py +3 -1
  5. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/models/pipeline.py +3 -1
  6. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3/xparse_client.egg-info}/PKG-INFO +1 -1
  7. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/CHANGELOG.md +0 -0
  8. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/LICENSE +0 -0
  9. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/MANIFEST.in +0 -0
  10. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/README.md +0 -0
  11. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/example/1_basic_api_usage.py +0 -0
  12. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/example/2_async_job.py +0 -0
  13. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/example/3_local_workflow.py +0 -0
  14. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/example/4_advanced_workflow.py +0 -0
  15. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/example/README.md +0 -0
  16. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/example/config_example.json +0 -0
  17. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/setup.cfg +0 -0
  18. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/tests/conftest.py +0 -0
  19. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/tests/unit/__init__.py +0 -0
  20. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/tests/unit/api/__init__.py +0 -0
  21. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/tests/unit/api/test_extract.py +0 -0
  22. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/tests/unit/api/test_local.py +0 -0
  23. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/tests/unit/api/test_parse.py +0 -0
  24. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/tests/unit/api/test_pipeline.py +0 -0
  25. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/tests/unit/api/test_workflows.py +0 -0
  26. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/tests/unit/connectors/test_ftp.py +0 -0
  27. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/tests/unit/connectors/test_local_connectors.py +0 -0
  28. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/tests/unit/connectors/test_milvus.py +0 -0
  29. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/tests/unit/connectors/test_qdrant.py +0 -0
  30. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/tests/unit/connectors/test_s3.py +0 -0
  31. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/tests/unit/connectors/test_smb.py +0 -0
  32. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/tests/unit/connectors/test_utils.py +0 -0
  33. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/tests/unit/models/test_local.py +0 -0
  34. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/tests/unit/models/test_pipeline_stages.py +0 -0
  35. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/tests/unit/models/test_workflows.py +0 -0
  36. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/tests/unit/test_base.py +0 -0
  37. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/tests/unit/test_client.py +0 -0
  38. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/tests/unit/test_config.py +0 -0
  39. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/tests/unit/test_exceptions.py +0 -0
  40. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/tests/unit/test_http.py +0 -0
  41. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/__init__.py +0 -0
  42. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/_base.py +0 -0
  43. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/_client.py +0 -0
  44. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/_config.py +0 -0
  45. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/_http.py +0 -0
  46. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/api/__init__.py +0 -0
  47. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/api/extract.py +0 -0
  48. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/api/parse.py +0 -0
  49. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/api/workflows.py +0 -0
  50. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/connectors/__init__.py +0 -0
  51. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/connectors/_utils.py +0 -0
  52. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/connectors/destinations/__init__.py +0 -0
  53. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/connectors/destinations/base.py +0 -0
  54. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/connectors/destinations/local.py +0 -0
  55. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/connectors/destinations/milvus.py +0 -0
  56. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/connectors/destinations/qdrant.py +0 -0
  57. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/connectors/destinations/s3.py +0 -0
  58. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/connectors/sources/__init__.py +0 -0
  59. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/connectors/sources/base.py +0 -0
  60. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/connectors/sources/ftp.py +0 -0
  61. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/connectors/sources/local.py +0 -0
  62. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/connectors/sources/s3.py +0 -0
  63. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/connectors/sources/smb.py +0 -0
  64. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/exceptions.py +0 -0
  65. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/models/__init__.py +0 -0
  66. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/models/chunk.py +0 -0
  67. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/models/embed.py +0 -0
  68. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/models/extract.py +0 -0
  69. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/models/local.py +0 -0
  70. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/models/parse.py +0 -0
  71. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/models/workflows.py +0 -0
  72. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client.egg-info/SOURCES.txt +0 -0
  73. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client.egg-info/dependency_links.txt +0 -0
  74. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client.egg-info/requires.txt +0 -0
  75. {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: xparse-client
3
- Version: 0.3.0b1
3
+ Version: 0.3.0b3
4
4
  Summary: 面向 Agent 和 RAG 的文档处理 Pipeline 客户端
5
5
  Author-email: INTSIG-TEXTIN <support@textin.com>
6
6
  License-Expression: MIT
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "xparse-client"
7
- version = "0.3.0b1"
7
+ version = "0.3.0b3"
8
8
  description = "面向 Agent 和 RAG 的文档处理 Pipeline 客户端"
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -28,7 +28,7 @@ from typing import TYPE_CHECKING, Callable, Literal
28
28
 
29
29
  from .._base import BaseAPI
30
30
  from ..models.local import FailedFile, WorkflowResult
31
- from ..models.pipeline import PipelineStage
31
+ from ..models.pipeline import PipelineConfig, PipelineStage
32
32
 
33
33
  if TYPE_CHECKING:
34
34
  pass
@@ -55,6 +55,7 @@ class Local(BaseAPI):
55
55
  destination, # Union[LocalDestination, S3Destination, MilvusDestination, QdrantDestination]
56
56
  stages: list[PipelineStage],
57
57
  *,
58
+ pipeline_config: PipelineConfig | None = None,
58
59
  progress_callback: Callable[[int, int, str], None] | None = None,
59
60
  on_error: Literal["stop", "continue", "retry"] = "stop",
60
61
  max_retries: int = 3,
@@ -75,6 +76,7 @@ class Local(BaseAPI):
75
76
  source: 数据源(本地 Connector 对象)
76
77
  destination: 输出目的地(本地 Connector 对象)
77
78
  stages: 处理阶段列表
79
+ pipeline_config: Pipeline 配置(包含中间结果保存配置)
78
80
  progress_callback: 进度回调函数 (current, total, message) -> None
79
81
  on_error: 错误处理策略 ("stop"|"continue"|"retry")
80
82
  max_retries: 最大重试次数(on_error="retry" 时生效)
@@ -125,20 +127,46 @@ class Local(BaseAPI):
125
127
 
126
128
  # 调用 Pipeline API
127
129
  from .pipeline import PipelineAPI
130
+
128
131
  pipeline_api = PipelineAPI(self._config, self._http)
129
132
  result = pipeline_api.execute(
130
133
  file=file_bytes,
131
134
  filename=filename,
132
135
  stages=stages,
136
+ config=pipeline_config,
133
137
  data_source=data_source,
134
138
  )
135
139
 
140
+ # 处理中间结果
141
+ if (
142
+ pipeline_config
143
+ and pipeline_config.include_intermediate_results
144
+ and pipeline_config.intermediate_results_destination
145
+ and hasattr(result, "intermediate_results")
146
+ and result.intermediate_results
147
+ ):
148
+ for stage_result in result.intermediate_results:
149
+ stage_name = stage_result.get("stage")
150
+ elements = stage_result.get("elements", [])
151
+ if stage_name and elements:
152
+ metadata_with_stage = {
153
+ "filename": filename,
154
+ "file_path": file_path,
155
+ "stage": stage_name,
156
+ }
157
+ if result.stats and hasattr(result.stats, "record_id"):
158
+ metadata_with_stage["record_id"] = result.stats.record_id
159
+
160
+ pipeline_config.intermediate_results_destination.write(
161
+ elements, metadata_with_stage
162
+ )
163
+
136
164
  # 准备写入数据
137
165
  # 如果是向量数据库,写入 embeddings
138
- if hasattr(result, 'elements') and result.elements:
166
+ if hasattr(result, "elements") and result.elements:
139
167
  elements_data = []
140
168
  for elem in result.elements:
141
- elem_dict = elem.model_dump() if hasattr(elem, 'model_dump') else elem
169
+ elem_dict = elem.model_dump() if hasattr(elem, "model_dump") else elem
142
170
  elements_data.append(elem_dict)
143
171
  else:
144
172
  elements_data = []
@@ -148,8 +176,8 @@ class Local(BaseAPI):
148
176
  "filename": filename,
149
177
  "file_path": file_path,
150
178
  }
151
- if hasattr(result, 'record_id'):
152
- metadata["record_id"] = result.record_id
179
+ if result.stats and hasattr(result.stats, "record_id"):
180
+ metadata["record_id"] = result.stats.record_id
153
181
 
154
182
  destination.write(elements_data, metadata)
155
183
 
@@ -164,11 +192,13 @@ class Local(BaseAPI):
164
192
  else:
165
193
  # continue: 记录失败并继续下一个文件
166
194
  failed += 1
167
- failed_files.append(FailedFile(
168
- file_path=file_path,
169
- error=error_msg,
170
- retry_count=0, # HTTP客户端内部已经处理了重试
171
- ))
195
+ failed_files.append(
196
+ FailedFile(
197
+ file_path=file_path,
198
+ error=error_msg,
199
+ retry_count=0, # HTTP客户端内部已经处理了重试
200
+ )
201
+ )
172
202
 
173
203
  # 计算总耗时
174
204
  duration = time.time() - start_time
@@ -120,7 +120,9 @@ class PipelineAPI(BaseAPI):
120
120
  }
121
121
 
122
122
  if config:
123
- data["config"] = json.dumps(config.model_dump(), ensure_ascii=False)
123
+ # 只序列化 API 需要的字段(排除 intermediate_results_destination)
124
+ config_dict = config.model_dump(exclude={"intermediate_results_destination"})
125
+ data["config"] = json.dumps(config_dict, ensure_ascii=False)
124
126
  if data_source:
125
127
  data["data_source"] = json.dumps(data_source, ensure_ascii=False)
126
128
 
@@ -99,11 +99,13 @@ class PipelineConfig(BaseModel):
99
99
 
100
100
  Attributes:
101
101
  include_intermediate_results: 是否包含中间结果
102
+ intermediate_results_destination: 中间结果保存目的地(仅支持 LocalDestination)
102
103
  """
103
104
 
104
105
  include_intermediate_results: bool = False
106
+ intermediate_results_destination: Any = None # LocalDestination 对象
105
107
 
106
- model_config = {"extra": "allow"}
108
+ model_config = {"extra": "allow", "arbitrary_types_allowed": True}
107
109
 
108
110
 
109
111
  class PipelineResponse(BaseModel):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: xparse-client
3
- Version: 0.3.0b1
3
+ Version: 0.3.0b3
4
4
  Summary: 面向 Agent 和 RAG 的文档处理 Pipeline 客户端
5
5
  Author-email: INTSIG-TEXTIN <support@textin.com>
6
6
  License-Expression: MIT
File without changes