xparse-client 0.3.0b1__tar.gz → 0.3.0b3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {xparse_client-0.3.0b1/xparse_client.egg-info → xparse_client-0.3.0b3}/PKG-INFO +1 -1
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/pyproject.toml +1 -1
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/api/local.py +40 -10
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/api/pipeline.py +3 -1
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/models/pipeline.py +3 -1
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3/xparse_client.egg-info}/PKG-INFO +1 -1
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/CHANGELOG.md +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/LICENSE +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/MANIFEST.in +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/README.md +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/example/1_basic_api_usage.py +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/example/2_async_job.py +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/example/3_local_workflow.py +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/example/4_advanced_workflow.py +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/example/README.md +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/example/config_example.json +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/setup.cfg +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/tests/conftest.py +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/tests/unit/__init__.py +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/tests/unit/api/__init__.py +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/tests/unit/api/test_extract.py +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/tests/unit/api/test_local.py +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/tests/unit/api/test_parse.py +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/tests/unit/api/test_pipeline.py +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/tests/unit/api/test_workflows.py +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/tests/unit/connectors/test_ftp.py +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/tests/unit/connectors/test_local_connectors.py +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/tests/unit/connectors/test_milvus.py +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/tests/unit/connectors/test_qdrant.py +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/tests/unit/connectors/test_s3.py +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/tests/unit/connectors/test_smb.py +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/tests/unit/connectors/test_utils.py +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/tests/unit/models/test_local.py +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/tests/unit/models/test_pipeline_stages.py +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/tests/unit/models/test_workflows.py +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/tests/unit/test_base.py +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/tests/unit/test_client.py +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/tests/unit/test_config.py +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/tests/unit/test_exceptions.py +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/tests/unit/test_http.py +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/__init__.py +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/_base.py +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/_client.py +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/_config.py +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/_http.py +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/api/__init__.py +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/api/extract.py +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/api/parse.py +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/api/workflows.py +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/connectors/__init__.py +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/connectors/_utils.py +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/connectors/destinations/__init__.py +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/connectors/destinations/base.py +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/connectors/destinations/local.py +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/connectors/destinations/milvus.py +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/connectors/destinations/qdrant.py +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/connectors/destinations/s3.py +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/connectors/sources/__init__.py +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/connectors/sources/base.py +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/connectors/sources/ftp.py +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/connectors/sources/local.py +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/connectors/sources/s3.py +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/connectors/sources/smb.py +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/exceptions.py +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/models/__init__.py +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/models/chunk.py +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/models/embed.py +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/models/extract.py +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/models/local.py +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/models/parse.py +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/models/workflows.py +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client.egg-info/SOURCES.txt +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client.egg-info/dependency_links.txt +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client.egg-info/requires.txt +0 -0
- {xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client.egg-info/top_level.txt +0 -0
|
@@ -28,7 +28,7 @@ from typing import TYPE_CHECKING, Callable, Literal
|
|
|
28
28
|
|
|
29
29
|
from .._base import BaseAPI
|
|
30
30
|
from ..models.local import FailedFile, WorkflowResult
|
|
31
|
-
from ..models.pipeline import PipelineStage
|
|
31
|
+
from ..models.pipeline import PipelineConfig, PipelineStage
|
|
32
32
|
|
|
33
33
|
if TYPE_CHECKING:
|
|
34
34
|
pass
|
|
@@ -55,6 +55,7 @@ class Local(BaseAPI):
|
|
|
55
55
|
destination, # Union[LocalDestination, S3Destination, MilvusDestination, QdrantDestination]
|
|
56
56
|
stages: list[PipelineStage],
|
|
57
57
|
*,
|
|
58
|
+
pipeline_config: PipelineConfig | None = None,
|
|
58
59
|
progress_callback: Callable[[int, int, str], None] | None = None,
|
|
59
60
|
on_error: Literal["stop", "continue", "retry"] = "stop",
|
|
60
61
|
max_retries: int = 3,
|
|
@@ -75,6 +76,7 @@ class Local(BaseAPI):
|
|
|
75
76
|
source: 数据源(本地 Connector 对象)
|
|
76
77
|
destination: 输出目的地(本地 Connector 对象)
|
|
77
78
|
stages: 处理阶段列表
|
|
79
|
+
pipeline_config: Pipeline 配置(包含中间结果保存配置)
|
|
78
80
|
progress_callback: 进度回调函数 (current, total, message) -> None
|
|
79
81
|
on_error: 错误处理策略 ("stop"|"continue"|"retry")
|
|
80
82
|
max_retries: 最大重试次数(on_error="retry" 时生效)
|
|
@@ -125,20 +127,46 @@ class Local(BaseAPI):
|
|
|
125
127
|
|
|
126
128
|
# 调用 Pipeline API
|
|
127
129
|
from .pipeline import PipelineAPI
|
|
130
|
+
|
|
128
131
|
pipeline_api = PipelineAPI(self._config, self._http)
|
|
129
132
|
result = pipeline_api.execute(
|
|
130
133
|
file=file_bytes,
|
|
131
134
|
filename=filename,
|
|
132
135
|
stages=stages,
|
|
136
|
+
config=pipeline_config,
|
|
133
137
|
data_source=data_source,
|
|
134
138
|
)
|
|
135
139
|
|
|
140
|
+
# 处理中间结果
|
|
141
|
+
if (
|
|
142
|
+
pipeline_config
|
|
143
|
+
and pipeline_config.include_intermediate_results
|
|
144
|
+
and pipeline_config.intermediate_results_destination
|
|
145
|
+
and hasattr(result, "intermediate_results")
|
|
146
|
+
and result.intermediate_results
|
|
147
|
+
):
|
|
148
|
+
for stage_result in result.intermediate_results:
|
|
149
|
+
stage_name = stage_result.get("stage")
|
|
150
|
+
elements = stage_result.get("elements", [])
|
|
151
|
+
if stage_name and elements:
|
|
152
|
+
metadata_with_stage = {
|
|
153
|
+
"filename": filename,
|
|
154
|
+
"file_path": file_path,
|
|
155
|
+
"stage": stage_name,
|
|
156
|
+
}
|
|
157
|
+
if result.stats and hasattr(result.stats, "record_id"):
|
|
158
|
+
metadata_with_stage["record_id"] = result.stats.record_id
|
|
159
|
+
|
|
160
|
+
pipeline_config.intermediate_results_destination.write(
|
|
161
|
+
elements, metadata_with_stage
|
|
162
|
+
)
|
|
163
|
+
|
|
136
164
|
# 准备写入数据
|
|
137
165
|
# 如果是向量数据库,写入 embeddings
|
|
138
|
-
if hasattr(result,
|
|
166
|
+
if hasattr(result, "elements") and result.elements:
|
|
139
167
|
elements_data = []
|
|
140
168
|
for elem in result.elements:
|
|
141
|
-
elem_dict = elem.model_dump() if hasattr(elem,
|
|
169
|
+
elem_dict = elem.model_dump() if hasattr(elem, "model_dump") else elem
|
|
142
170
|
elements_data.append(elem_dict)
|
|
143
171
|
else:
|
|
144
172
|
elements_data = []
|
|
@@ -148,8 +176,8 @@ class Local(BaseAPI):
|
|
|
148
176
|
"filename": filename,
|
|
149
177
|
"file_path": file_path,
|
|
150
178
|
}
|
|
151
|
-
if hasattr(result,
|
|
152
|
-
metadata["record_id"] = result.record_id
|
|
179
|
+
if result.stats and hasattr(result.stats, "record_id"):
|
|
180
|
+
metadata["record_id"] = result.stats.record_id
|
|
153
181
|
|
|
154
182
|
destination.write(elements_data, metadata)
|
|
155
183
|
|
|
@@ -164,11 +192,13 @@ class Local(BaseAPI):
|
|
|
164
192
|
else:
|
|
165
193
|
# continue: 记录失败并继续下一个文件
|
|
166
194
|
failed += 1
|
|
167
|
-
failed_files.append(
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
195
|
+
failed_files.append(
|
|
196
|
+
FailedFile(
|
|
197
|
+
file_path=file_path,
|
|
198
|
+
error=error_msg,
|
|
199
|
+
retry_count=0, # HTTP客户端内部已经处理了重试
|
|
200
|
+
)
|
|
201
|
+
)
|
|
172
202
|
|
|
173
203
|
# 计算总耗时
|
|
174
204
|
duration = time.time() - start_time
|
|
@@ -120,7 +120,9 @@ class PipelineAPI(BaseAPI):
|
|
|
120
120
|
}
|
|
121
121
|
|
|
122
122
|
if config:
|
|
123
|
-
|
|
123
|
+
# 只序列化 API 需要的字段(排除 intermediate_results_destination)
|
|
124
|
+
config_dict = config.model_dump(exclude={"intermediate_results_destination"})
|
|
125
|
+
data["config"] = json.dumps(config_dict, ensure_ascii=False)
|
|
124
126
|
if data_source:
|
|
125
127
|
data["data_source"] = json.dumps(data_source, ensure_ascii=False)
|
|
126
128
|
|
|
@@ -99,11 +99,13 @@ class PipelineConfig(BaseModel):
|
|
|
99
99
|
|
|
100
100
|
Attributes:
|
|
101
101
|
include_intermediate_results: 是否包含中间结果
|
|
102
|
+
intermediate_results_destination: 中间结果保存目的地(仅支持 LocalDestination)
|
|
102
103
|
"""
|
|
103
104
|
|
|
104
105
|
include_intermediate_results: bool = False
|
|
106
|
+
intermediate_results_destination: Any = None # LocalDestination 对象
|
|
105
107
|
|
|
106
|
-
model_config = {"extra": "allow"}
|
|
108
|
+
model_config = {"extra": "allow", "arbitrary_types_allowed": True}
|
|
107
109
|
|
|
108
110
|
|
|
109
111
|
class PipelineResponse(BaseModel):
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/tests/unit/connectors/test_local_connectors.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/connectors/destinations/__init__.py
RENAMED
|
File without changes
|
{xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/connectors/destinations/base.py
RENAMED
|
File without changes
|
{xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/connectors/destinations/local.py
RENAMED
|
File without changes
|
{xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/connectors/destinations/milvus.py
RENAMED
|
File without changes
|
{xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/connectors/destinations/qdrant.py
RENAMED
|
File without changes
|
|
File without changes
|
{xparse_client-0.3.0b1 → xparse_client-0.3.0b3}/xparse_client/connectors/sources/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|