xparse-client 0.2.11__py3-none-any.whl → 0.3.0b3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. example/1_basic_api_usage.py +198 -0
  2. example/2_async_job.py +210 -0
  3. example/3_local_workflow.py +300 -0
  4. example/4_advanced_workflow.py +327 -0
  5. example/README.md +128 -0
  6. example/config_example.json +95 -0
  7. tests/conftest.py +310 -0
  8. tests/unit/__init__.py +1 -0
  9. tests/unit/api/__init__.py +1 -0
  10. tests/unit/api/test_extract.py +232 -0
  11. tests/unit/api/test_local.py +231 -0
  12. tests/unit/api/test_parse.py +374 -0
  13. tests/unit/api/test_pipeline.py +369 -0
  14. tests/unit/api/test_workflows.py +108 -0
  15. tests/unit/connectors/test_ftp.py +525 -0
  16. tests/unit/connectors/test_local_connectors.py +324 -0
  17. tests/unit/connectors/test_milvus.py +368 -0
  18. tests/unit/connectors/test_qdrant.py +399 -0
  19. tests/unit/connectors/test_s3.py +598 -0
  20. tests/unit/connectors/test_smb.py +442 -0
  21. tests/unit/connectors/test_utils.py +335 -0
  22. tests/unit/models/test_local.py +54 -0
  23. tests/unit/models/test_pipeline_stages.py +144 -0
  24. tests/unit/models/test_workflows.py +55 -0
  25. tests/unit/test_base.py +437 -0
  26. tests/unit/test_client.py +110 -0
  27. tests/unit/test_config.py +160 -0
  28. tests/unit/test_exceptions.py +182 -0
  29. tests/unit/test_http.py +562 -0
  30. xparse_client/__init__.py +111 -20
  31. xparse_client/_base.py +179 -0
  32. xparse_client/_client.py +218 -0
  33. xparse_client/_config.py +221 -0
  34. xparse_client/_http.py +350 -0
  35. xparse_client/api/__init__.py +14 -0
  36. xparse_client/api/extract.py +109 -0
  37. xparse_client/api/local.py +215 -0
  38. xparse_client/api/parse.py +209 -0
  39. xparse_client/api/pipeline.py +134 -0
  40. xparse_client/api/workflows.py +204 -0
  41. xparse_client/connectors/__init__.py +45 -0
  42. xparse_client/connectors/_utils.py +138 -0
  43. xparse_client/connectors/destinations/__init__.py +45 -0
  44. xparse_client/connectors/destinations/base.py +116 -0
  45. xparse_client/connectors/destinations/local.py +91 -0
  46. xparse_client/connectors/destinations/milvus.py +229 -0
  47. xparse_client/connectors/destinations/qdrant.py +238 -0
  48. xparse_client/connectors/destinations/s3.py +163 -0
  49. xparse_client/connectors/sources/__init__.py +45 -0
  50. xparse_client/connectors/sources/base.py +74 -0
  51. xparse_client/connectors/sources/ftp.py +278 -0
  52. xparse_client/connectors/sources/local.py +176 -0
  53. xparse_client/connectors/sources/s3.py +232 -0
  54. xparse_client/connectors/sources/smb.py +259 -0
  55. xparse_client/exceptions.py +398 -0
  56. xparse_client/models/__init__.py +60 -0
  57. xparse_client/models/chunk.py +39 -0
  58. xparse_client/models/embed.py +62 -0
  59. xparse_client/models/extract.py +41 -0
  60. xparse_client/models/local.py +38 -0
  61. xparse_client/models/parse.py +136 -0
  62. xparse_client/models/pipeline.py +134 -0
  63. xparse_client/models/workflows.py +74 -0
  64. xparse_client-0.3.0b3.dist-info/METADATA +1075 -0
  65. xparse_client-0.3.0b3.dist-info/RECORD +68 -0
  66. {xparse_client-0.2.11.dist-info → xparse_client-0.3.0b3.dist-info}/WHEEL +1 -1
  67. {xparse_client-0.2.11.dist-info → xparse_client-0.3.0b3.dist-info}/licenses/LICENSE +1 -1
  68. {xparse_client-0.2.11.dist-info → xparse_client-0.3.0b3.dist-info}/top_level.txt +1 -0
  69. example/run_pipeline.py +0 -506
  70. example/run_pipeline_test.py +0 -458
  71. xparse_client/pipeline/__init__.py +0 -3
  72. xparse_client/pipeline/config.py +0 -129
  73. xparse_client/pipeline/destinations.py +0 -487
  74. xparse_client/pipeline/pipeline.py +0 -622
  75. xparse_client/pipeline/sources.py +0 -585
  76. xparse_client-0.2.11.dist-info/METADATA +0 -1050
  77. xparse_client-0.2.11.dist-info/RECORD +0 -13
@@ -0,0 +1,259 @@
1
+ """SMB/CIFS 数据源(懒加载 pysmb)"""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ from datetime import datetime, timezone
7
+ from io import BytesIO
8
+ from typing import Any
9
+
10
+ from ...exceptions import SourceError
11
+ from .._utils import match_file_pattern, normalize_wildcard_patterns, to_millis_timestamp
12
+ from .base import Source
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ def _get_smb_connection():
18
+ """懒加载 pysmb
19
+
20
+ Returns:
21
+ SMBConnection 类
22
+
23
+ Raises:
24
+ ImportError: pysmb 未安装
25
+ """
26
+ try:
27
+ from smb.SMBConnection import SMBConnection
28
+
29
+ return SMBConnection
30
+ except ImportError as e:
31
+ raise ImportError(
32
+ "使用 SmbSource 需要安装 pysmb: pip install xparse-client[smb]"
33
+ ) from e
34
+
35
+
36
+ class SmbSource(Source):
37
+ """SMB/CIFS 数据源
38
+
39
+ 从 SMB/CIFS 共享读取文件。pysmb 会在首次使用时懒加载。
40
+
41
+ Attributes:
42
+ host: SMB 主机
43
+ share_name: 共享名称
44
+ path: 共享内的路径
45
+ pattern: 文件匹配模式
46
+ recursive: 是否递归
47
+
48
+ Example:
49
+ >>> source = SmbSource(
50
+ ... host="192.168.1.100",
51
+ ... share_name="documents",
52
+ ... username="user",
53
+ ... password="pass",
54
+ ... path="reports/",
55
+ ... pattern=["*.pdf"],
56
+ ... )
57
+ >>> files = source.list_files()
58
+ """
59
+
60
+ def __init__(
61
+ self,
62
+ host: str,
63
+ share_name: str,
64
+ username: str,
65
+ password: str,
66
+ domain: str = "",
67
+ port: int = 445,
68
+ path: str = "",
69
+ pattern: list[str] | None = None,
70
+ recursive: bool = False,
71
+ ) -> None:
72
+ """初始化 SMB 数据源
73
+
74
+ Args:
75
+ host: SMB 主机地址
76
+ share_name: 共享名称
77
+ username: 用户名
78
+ password: 密码
79
+ domain: 域名(可选)
80
+ port: SMB 端口,默认 445
81
+ path: 共享内的路径(可选)
82
+ pattern: 文件匹配模式列表
83
+ recursive: 是否递归,默认 False
84
+
85
+ Raises:
86
+ SourceError: 连接失败
87
+ """
88
+ SMBConnection = _get_smb_connection()
89
+
90
+ self.host = host
91
+ self.share_name = share_name
92
+ self.username = username
93
+ self.domain = domain
94
+ self.port = port
95
+ self.path = path.strip("/").strip("\\") if path else ""
96
+ self.pattern = normalize_wildcard_patterns(pattern)
97
+ self.recursive = recursive
98
+
99
+ try:
100
+ self.conn = SMBConnection(
101
+ username,
102
+ password,
103
+ "", # my_name
104
+ host,
105
+ domain=domain,
106
+ use_ntlm_v2=True,
107
+ )
108
+ self.conn.connect(host, port)
109
+ logger.info(f"SMB 连接成功: {host}/{share_name}")
110
+ except Exception as e:
111
+ raise SourceError(
112
+ f"SMB 连接失败: {e}",
113
+ connector_type="smb",
114
+ operation="connect",
115
+ details={"host": host, "share_name": share_name},
116
+ ) from e
117
+
118
+ def list_files(self) -> list[str]:
119
+ """列出 SMB 文件
120
+
121
+ Returns:
122
+ 文件路径列表
123
+
124
+ Raises:
125
+ SourceError: 列出文件失败
126
+ """
127
+ try:
128
+ files = []
129
+ base_path = "/" if not self.path else f"/{self.path}"
130
+
131
+ self._list_recursive(base_path, base_path, files)
132
+
133
+ logger.info(f"SMB 找到 {len(files)} 个文件")
134
+ return files
135
+
136
+ except SourceError:
137
+ raise
138
+ except Exception as e:
139
+ raise SourceError(
140
+ f"列出 SMB 文件失败: {e}",
141
+ connector_type="smb",
142
+ operation="list_files",
143
+ ) from e
144
+
145
+ def _list_recursive(
146
+ self, current_path: str, base_path: str, files: list[str]
147
+ ) -> None:
148
+ """递归列出目录下的文件"""
149
+ try:
150
+ items = self.conn.listPath(self.share_name, current_path)
151
+
152
+ for item in items:
153
+ # 跳过 . 和 .. 以及隐藏文件
154
+ if item.filename in [".", ".."] or item.filename.startswith("."):
155
+ continue
156
+
157
+ item_path = (
158
+ f"{current_path.rstrip('/')}/{item.filename}"
159
+ if current_path != "/"
160
+ else f"/{item.filename}"
161
+ )
162
+
163
+ # 计算相对路径
164
+ relative_path = item_path[len(base_path) :].lstrip("/")
165
+
166
+ if item.isDirectory:
167
+ if self.recursive:
168
+ self._list_recursive(item_path, base_path, files)
169
+ # 非递归模式忽略子目录
170
+ else:
171
+ if match_file_pattern(relative_path, self.pattern):
172
+ files.append(relative_path)
173
+
174
+ except Exception as e:
175
+ logger.warning(f"SMB 列出路径失败 {current_path}: {e}")
176
+
177
+ def read_file(self, file_path: str) -> tuple[bytes, dict[str, Any]]:
178
+ """读取 SMB 文件
179
+
180
+ Args:
181
+ file_path: 文件相对路径
182
+
183
+ Returns:
184
+ (文件内容, 元信息) 元组
185
+
186
+ Raises:
187
+ SourceError: 读取文件失败
188
+ """
189
+ base_path = "/" if not self.path else f"/{self.path}"
190
+ full_path = (
191
+ f"{base_path.rstrip('/')}/{file_path.lstrip('/')}"
192
+ if base_path != "/"
193
+ else f"/{file_path.lstrip('/')}"
194
+ )
195
+
196
+ try:
197
+ file_obj = BytesIO()
198
+ self.conn.retrieveFile(self.share_name, full_path, file_obj)
199
+
200
+ # 获取文件属性
201
+ date_created = None
202
+ date_modified = None
203
+ try:
204
+ attrs = self.conn.getAttributes(self.share_name, full_path)
205
+ date_created = self._to_timestamp(getattr(attrs, "create_time", None))
206
+ date_modified = self._to_timestamp(
207
+ getattr(attrs, "last_write_time", None)
208
+ )
209
+ except Exception as e:
210
+ logger.debug(f"SMB 获取文件属性失败 {full_path}: {e}")
211
+
212
+ smb_url = f"smb://{self.host}/{self.share_name}{full_path}"
213
+ data_source = {
214
+ "url": smb_url,
215
+ "version": to_millis_timestamp(date_modified),
216
+ "date_created": to_millis_timestamp(date_created),
217
+ "date_modified": to_millis_timestamp(date_modified),
218
+ "record_locator": {
219
+ "server": self.host,
220
+ "share": self.share_name,
221
+ "protocol": "smb",
222
+ "remote_file_path": full_path,
223
+ },
224
+ }
225
+
226
+ file_obj.seek(0)
227
+ return file_obj.read(), data_source
228
+
229
+ except Exception as e:
230
+ raise SourceError(
231
+ f"读取 SMB 文件失败: {file_path}, {e}",
232
+ connector_type="smb",
233
+ operation="read_file",
234
+ ) from e
235
+
236
+ @staticmethod
237
+ def _to_timestamp(value: Any) -> float | None:
238
+ """将时间值转换为 Unix 时间戳"""
239
+ if value is None:
240
+ return None
241
+ if isinstance(value, datetime):
242
+ return value.astimezone(timezone.utc).timestamp()
243
+ if isinstance(value, (int, float)):
244
+ return float(value)
245
+ return None
246
+
247
+ def close(self) -> None:
248
+ """关闭 SMB 连接"""
249
+ try:
250
+ if hasattr(self, "conn") and self.conn:
251
+ self.conn.close()
252
+ except Exception:
253
+ pass
254
+
255
+ def __repr__(self) -> str:
256
+ return f"<SmbSource host={self.host} share={self.share_name}>"
257
+
258
+
259
+ __all__ = ["SmbSource"]
@@ -0,0 +1,398 @@
1
+ """统一异常类层次结构
2
+
3
+ 提供结构化的异常类型,便于错误处理和排查。
4
+
5
+ Example:
6
+ >>> from xparse_client.exceptions import APIError, AuthenticationError
7
+ >>>
8
+ >>> try:
9
+ ... result = client.parse.partition(file=file_bytes, filename="doc.pdf")
10
+ ... except AuthenticationError as e:
11
+ ... print(f"认证失败: {e.message}, request_id: {e.request_id}")
12
+ ... except APIError as e:
13
+ ... print(f"API 错误: {e}")
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ from typing import Any
19
+
20
+
21
+ class XParseClientError(Exception):
22
+ """xParse SDK 基础异常类
23
+
24
+ 所有 SDK 异常的基类,提供统一的异常结构。
25
+
26
+ Attributes:
27
+ message: 错误信息
28
+ details: 额外的错误详情
29
+ """
30
+
31
+ def __init__(
32
+ self,
33
+ message: str,
34
+ *,
35
+ details: dict[str, Any] | None = None,
36
+ ) -> None:
37
+ self.message = message
38
+ self.details = details or {}
39
+ super().__init__(message)
40
+
41
+ def __str__(self) -> str:
42
+ if self.details:
43
+ details_str = ", ".join(f"{k}={v}" for k, v in self.details.items())
44
+ return f"{self.message} ({details_str})"
45
+ return self.message
46
+
47
+ def __repr__(self) -> str:
48
+ return f"{self.__class__.__name__}(message={self.message!r}, details={self.details!r})"
49
+
50
+ def to_dict(self) -> dict[str, Any]:
51
+ """转换为字典,便于序列化"""
52
+ return {
53
+ "error_type": self.__class__.__name__,
54
+ "message": self.message,
55
+ "details": self.details,
56
+ }
57
+
58
+
59
+ # ============================================================
60
+ # 配置和验证错误
61
+ # ============================================================
62
+
63
+
64
+ class ConfigurationError(XParseClientError):
65
+ """配置错误
66
+
67
+ SDK 配置不正确时抛出。
68
+
69
+ Example:
70
+ >>> raise ConfigurationError(
71
+ ... "缺少必要的配置参数",
72
+ ... details={"missing_params": ["app_id", "secret_code"]}
73
+ ... )
74
+ """
75
+
76
+ pass
77
+
78
+
79
+ class ValidationError(XParseClientError):
80
+ """输入验证错误
81
+
82
+ 请求参数验证失败时抛出。
83
+
84
+ Attributes:
85
+ field: 验证失败的字段名
86
+ value: 传入的值
87
+ """
88
+
89
+ def __init__(
90
+ self,
91
+ message: str,
92
+ *,
93
+ field: str | None = None,
94
+ value: Any = None,
95
+ details: dict[str, Any] | None = None,
96
+ ) -> None:
97
+ self.field = field
98
+ self.value = value
99
+
100
+ _details = details or {}
101
+ if field:
102
+ _details["field"] = field
103
+ if value is not None:
104
+ _details["value"] = str(value)[:100] # 截断过长的值
105
+
106
+ super().__init__(message, details=_details)
107
+
108
+
109
+ # ============================================================
110
+ # API 错误
111
+ # ============================================================
112
+
113
+
114
+ class APIError(XParseClientError):
115
+ """API 错误基类
116
+
117
+ 所有 HTTP API 相关错误的基类。
118
+
119
+ Attributes:
120
+ message: 错误信息
121
+ status_code: HTTP 状态码
122
+ request_id: 请求 ID(用于排查)
123
+ response_body: 原始响应体
124
+ """
125
+
126
+ def __init__(
127
+ self,
128
+ message: str,
129
+ *,
130
+ status_code: int | None = None,
131
+ request_id: str | None = None,
132
+ response_body: str | None = None,
133
+ details: dict[str, Any] | None = None,
134
+ ) -> None:
135
+ self.status_code = status_code
136
+ self.request_id = request_id
137
+ self.response_body = response_body
138
+
139
+ _details = details or {}
140
+ if status_code:
141
+ _details["status_code"] = status_code
142
+ if request_id:
143
+ _details["request_id"] = request_id
144
+
145
+ super().__init__(message, details=_details)
146
+
147
+ def __str__(self) -> str:
148
+ parts = [self.message]
149
+ if self.status_code:
150
+ parts.append(f"status={self.status_code}")
151
+ if self.request_id:
152
+ parts.append(f"request_id={self.request_id}")
153
+ return " ".join(parts)
154
+
155
+
156
+ class AuthenticationError(APIError):
157
+ """认证错误 (401)
158
+
159
+ API 密钥无效或已过期时抛出。
160
+
161
+ Example:
162
+ >>> raise AuthenticationError(
163
+ ... "API 密钥无效",
164
+ ... status_code=401,
165
+ ... request_id="req-123"
166
+ ... )
167
+ """
168
+
169
+ pass
170
+
171
+
172
+ class PermissionDeniedError(APIError):
173
+ """权限错误 (403)
174
+
175
+ 无权访问请求的资源时抛出。
176
+ """
177
+
178
+ pass
179
+
180
+
181
+ class NotFoundError(APIError):
182
+ """资源不存在错误 (404)
183
+
184
+ 请求的资源不存在时抛出。
185
+
186
+ Attributes:
187
+ resource_type: 资源类型(如 job, workflow)
188
+ resource_id: 资源 ID
189
+ """
190
+
191
+ def __init__(
192
+ self,
193
+ message: str,
194
+ *,
195
+ resource_type: str | None = None,
196
+ resource_id: str | None = None,
197
+ **kwargs: Any,
198
+ ) -> None:
199
+ self.resource_type = resource_type
200
+ self.resource_id = resource_id
201
+
202
+ details = kwargs.pop("details", {}) or {}
203
+ if resource_type:
204
+ details["resource_type"] = resource_type
205
+ if resource_id:
206
+ details["resource_id"] = resource_id
207
+
208
+ super().__init__(message, details=details, **kwargs)
209
+
210
+
211
+ class RateLimitError(APIError):
212
+ """限流错误 (429)
213
+
214
+ 请求频率过高时抛出。
215
+
216
+ Attributes:
217
+ retry_after: 建议的重试等待时间(秒)
218
+ """
219
+
220
+ def __init__(
221
+ self,
222
+ message: str,
223
+ *,
224
+ retry_after: int | None = None,
225
+ **kwargs: Any,
226
+ ) -> None:
227
+ self.retry_after = retry_after
228
+
229
+ details = kwargs.pop("details", {}) or {}
230
+ if retry_after:
231
+ details["retry_after"] = retry_after
232
+
233
+ super().__init__(message, details=details, **kwargs)
234
+
235
+
236
+ class ServerError(APIError):
237
+ """服务器错误 (5xx)
238
+
239
+ 服务器内部错误时抛出。
240
+ """
241
+
242
+ pass
243
+
244
+
245
+ class RequestTimeoutError(APIError):
246
+ """请求超时错误
247
+
248
+ 请求超时时抛出。
249
+
250
+ Attributes:
251
+ timeout_seconds: 超时时间设置
252
+ """
253
+
254
+ def __init__(
255
+ self,
256
+ message: str,
257
+ *,
258
+ timeout_seconds: float | None = None,
259
+ **kwargs: Any,
260
+ ) -> None:
261
+ self.timeout_seconds = timeout_seconds
262
+
263
+ details = kwargs.pop("details", {}) or {}
264
+ if timeout_seconds:
265
+ details["timeout_seconds"] = timeout_seconds
266
+
267
+ super().__init__(message, details=details, **kwargs)
268
+
269
+
270
+ # ============================================================
271
+ # 连接器错误
272
+ # ============================================================
273
+
274
+
275
+ class ConnectorError(XParseClientError):
276
+ """连接器错误基类
277
+
278
+ Source 和 Destination 操作相关错误的基类。
279
+
280
+ Attributes:
281
+ connector_type: 连接器类型(如 s3, local, milvus)
282
+ operation: 操作类型(如 list, read, write)
283
+ """
284
+
285
+ def __init__(
286
+ self,
287
+ message: str,
288
+ *,
289
+ connector_type: str | None = None,
290
+ operation: str | None = None,
291
+ details: dict[str, Any] | None = None,
292
+ ) -> None:
293
+ self.connector_type = connector_type
294
+ self.operation = operation
295
+
296
+ _details = details or {}
297
+ if connector_type:
298
+ _details["connector_type"] = connector_type
299
+ if operation:
300
+ _details["operation"] = operation
301
+
302
+ super().__init__(message, details=_details)
303
+
304
+
305
+ class SourceError(ConnectorError):
306
+ """数据源错误
307
+
308
+ 读取数据源时发生错误。
309
+
310
+ Example:
311
+ >>> raise SourceError(
312
+ ... "无法连接到 S3 存储桶",
313
+ ... connector_type="s3",
314
+ ... operation="connect",
315
+ ... details={"bucket": "my-bucket"}
316
+ ... )
317
+ """
318
+
319
+ pass
320
+
321
+
322
+ class DestinationError(ConnectorError):
323
+ """目的地错误
324
+
325
+ 写入目的地时发生错误。
326
+
327
+ Example:
328
+ >>> raise DestinationError(
329
+ ... "写入 Milvus 失败",
330
+ ... connector_type="milvus",
331
+ ... operation="write",
332
+ ... details={"collection": "my_collection"}
333
+ ... )
334
+ """
335
+
336
+ pass
337
+
338
+
339
+ # ============================================================
340
+ # Pipeline 错误
341
+ # ============================================================
342
+
343
+
344
+ class PipelineError(XParseClientError):
345
+ """Pipeline 执行错误
346
+
347
+ Pipeline 执行过程中发生的错误。
348
+
349
+ Attributes:
350
+ stage: 发生错误的阶段(如 parse, chunk, embed)
351
+ filename: 处理的文件名
352
+ """
353
+
354
+ def __init__(
355
+ self,
356
+ message: str,
357
+ *,
358
+ stage: str | None = None,
359
+ filename: str | None = None,
360
+ details: dict[str, Any] | None = None,
361
+ ) -> None:
362
+ self.stage = stage
363
+ self.filename = filename
364
+
365
+ _details = details or {}
366
+ if stage:
367
+ _details["stage"] = stage
368
+ if filename:
369
+ _details["filename"] = filename
370
+
371
+ super().__init__(message, details=_details)
372
+
373
+
374
+ # ============================================================
375
+ # 导出
376
+ # ============================================================
377
+
378
+ __all__ = [
379
+ # 基类
380
+ "XParseClientError",
381
+ # 配置和验证
382
+ "ConfigurationError",
383
+ "ValidationError",
384
+ # API 错误
385
+ "APIError",
386
+ "AuthenticationError",
387
+ "PermissionDeniedError",
388
+ "NotFoundError",
389
+ "RateLimitError",
390
+ "ServerError",
391
+ "RequestTimeoutError",
392
+ # 连接器错误
393
+ "ConnectorError",
394
+ "SourceError",
395
+ "DestinationError",
396
+ # Pipeline 错误
397
+ "PipelineError",
398
+ ]
@@ -0,0 +1,60 @@
1
+ """数据模型模块
2
+
3
+ 提供 API 请求和响应的 Pydantic 数据模型。
4
+ """
5
+
6
+ from .chunk import ChunkConfig
7
+ from .embed import EmbedConfig
8
+ from .extract import ExtractConfig
9
+ from .local import FailedFile, WorkflowResult
10
+ from .parse import (
11
+ AsyncJobResponse,
12
+ Element,
13
+ ElementMetadata,
14
+ JobStatusResponse,
15
+ ParseConfig,
16
+ ParseResponse,
17
+ )
18
+ from .pipeline import (
19
+ ChunkStage,
20
+ EmbedStage,
21
+ ExtractStage,
22
+ ParseStage,
23
+ PipelineConfig,
24
+ PipelineResponse,
25
+ PipelineStage,
26
+ PipelineStats,
27
+ )
28
+ from .workflows import Schedule, WorkflowInformation, WorkflowState
29
+
30
+ __all__ = [
31
+ # Parse 模型
32
+ "ParseConfig",
33
+ "Element",
34
+ "ElementMetadata",
35
+ "ParseResponse",
36
+ "AsyncJobResponse",
37
+ "JobStatusResponse",
38
+ # Extract 模型
39
+ "ExtractConfig",
40
+ # Chunk 模型
41
+ "ChunkConfig",
42
+ # Embed 模型
43
+ "EmbedConfig",
44
+ # Pipeline 模型
45
+ "ParseStage",
46
+ "ChunkStage",
47
+ "EmbedStage",
48
+ "ExtractStage",
49
+ "PipelineStage",
50
+ "PipelineStats",
51
+ "PipelineConfig",
52
+ "PipelineResponse",
53
+ # Local 模型
54
+ "FailedFile",
55
+ "WorkflowResult",
56
+ # Workflows 模型
57
+ "WorkflowInformation",
58
+ "WorkflowState",
59
+ "Schedule",
60
+ ]