xparse-client 0.2.20__py3-none-any.whl → 0.3.0b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- example/1_basic_api_usage.py +198 -0
- example/2_async_job.py +210 -0
- example/3_local_workflow.py +300 -0
- example/4_advanced_workflow.py +327 -0
- example/README.md +128 -0
- example/config_example.json +95 -0
- tests/conftest.py +310 -0
- tests/unit/__init__.py +1 -0
- tests/unit/api/__init__.py +1 -0
- tests/unit/api/test_extract.py +232 -0
- tests/unit/api/test_local.py +231 -0
- tests/unit/api/test_parse.py +374 -0
- tests/unit/api/test_pipeline.py +369 -0
- tests/unit/api/test_workflows.py +108 -0
- tests/unit/connectors/test_ftp.py +525 -0
- tests/unit/connectors/test_local_connectors.py +324 -0
- tests/unit/connectors/test_milvus.py +368 -0
- tests/unit/connectors/test_qdrant.py +399 -0
- tests/unit/connectors/test_s3.py +598 -0
- tests/unit/connectors/test_smb.py +442 -0
- tests/unit/connectors/test_utils.py +335 -0
- tests/unit/models/test_local.py +54 -0
- tests/unit/models/test_pipeline_stages.py +144 -0
- tests/unit/models/test_workflows.py +55 -0
- tests/unit/test_base.py +437 -0
- tests/unit/test_client.py +110 -0
- tests/unit/test_config.py +160 -0
- tests/unit/test_exceptions.py +182 -0
- tests/unit/test_http.py +562 -0
- xparse_client/__init__.py +110 -20
- xparse_client/_base.py +179 -0
- xparse_client/_client.py +218 -0
- xparse_client/_config.py +221 -0
- xparse_client/_http.py +350 -0
- xparse_client/api/__init__.py +14 -0
- xparse_client/api/extract.py +109 -0
- xparse_client/api/local.py +185 -0
- xparse_client/api/parse.py +209 -0
- xparse_client/api/pipeline.py +132 -0
- xparse_client/api/workflows.py +204 -0
- xparse_client/connectors/__init__.py +45 -0
- xparse_client/connectors/_utils.py +138 -0
- xparse_client/connectors/destinations/__init__.py +45 -0
- xparse_client/connectors/destinations/base.py +116 -0
- xparse_client/connectors/destinations/local.py +91 -0
- xparse_client/connectors/destinations/milvus.py +229 -0
- xparse_client/connectors/destinations/qdrant.py +238 -0
- xparse_client/connectors/destinations/s3.py +163 -0
- xparse_client/connectors/sources/__init__.py +45 -0
- xparse_client/connectors/sources/base.py +74 -0
- xparse_client/connectors/sources/ftp.py +278 -0
- xparse_client/connectors/sources/local.py +176 -0
- xparse_client/connectors/sources/s3.py +232 -0
- xparse_client/connectors/sources/smb.py +259 -0
- xparse_client/exceptions.py +398 -0
- xparse_client/models/__init__.py +60 -0
- xparse_client/models/chunk.py +39 -0
- xparse_client/models/embed.py +62 -0
- xparse_client/models/extract.py +41 -0
- xparse_client/models/local.py +38 -0
- xparse_client/models/parse.py +136 -0
- xparse_client/models/pipeline.py +132 -0
- xparse_client/models/workflows.py +74 -0
- xparse_client-0.3.0b1.dist-info/METADATA +1075 -0
- xparse_client-0.3.0b1.dist-info/RECORD +68 -0
- {xparse_client-0.2.20.dist-info → xparse_client-0.3.0b1.dist-info}/WHEEL +1 -1
- {xparse_client-0.2.20.dist-info → xparse_client-0.3.0b1.dist-info}/licenses/LICENSE +1 -1
- {xparse_client-0.2.20.dist-info → xparse_client-0.3.0b1.dist-info}/top_level.txt +2 -0
- xparse_client/pipeline/__init__.py +0 -3
- xparse_client/pipeline/config.py +0 -163
- xparse_client/pipeline/destinations.py +0 -489
- xparse_client/pipeline/pipeline.py +0 -860
- xparse_client/pipeline/sources.py +0 -583
- xparse_client-0.2.20.dist-info/METADATA +0 -1050
- xparse_client-0.2.20.dist-info/RECORD +0 -11
xparse_client/_base.py
ADDED
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
"""API 模块基类
|
|
2
|
+
|
|
3
|
+
提供所有 API 模块的基类,封装通用的 HTTP 请求和响应处理逻辑。
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from abc import ABC
|
|
9
|
+
from typing import TYPE_CHECKING, Any, TypeVar
|
|
10
|
+
|
|
11
|
+
import httpx
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from ._config import SDKConfiguration
|
|
15
|
+
from ._http import HTTPClient
|
|
16
|
+
|
|
17
|
+
T = TypeVar("T")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class BaseAPI(ABC): # noqa: B024
|
|
21
|
+
"""API 模块基类
|
|
22
|
+
|
|
23
|
+
所有 API 模块的基类,提供统一的 HTTP 请求封装。
|
|
24
|
+
|
|
25
|
+
Attributes:
|
|
26
|
+
_base_path: API 路径前缀
|
|
27
|
+
_config: SDK 配置
|
|
28
|
+
_http: 同步 HTTP 客户端
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
_base_path: str = "/api/xparse"
|
|
32
|
+
|
|
33
|
+
def __init__(
|
|
34
|
+
self,
|
|
35
|
+
config: SDKConfiguration,
|
|
36
|
+
http_client: HTTPClient,
|
|
37
|
+
) -> None:
|
|
38
|
+
"""初始化 API 模块
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
config: SDK 配置
|
|
42
|
+
http_client: 同步 HTTP 客户端
|
|
43
|
+
"""
|
|
44
|
+
self._config = config
|
|
45
|
+
self._http = http_client
|
|
46
|
+
|
|
47
|
+
# ========== 同步请求方法 ==========
|
|
48
|
+
|
|
49
|
+
def _request(
|
|
50
|
+
self,
|
|
51
|
+
method: str,
|
|
52
|
+
path: str,
|
|
53
|
+
*,
|
|
54
|
+
json: dict[str, Any] | None = None,
|
|
55
|
+
data: dict[str, Any] | None = None,
|
|
56
|
+
files: dict[str, Any] | None = None,
|
|
57
|
+
params: dict[str, Any] | None = None,
|
|
58
|
+
headers: dict[str, str] | None = None,
|
|
59
|
+
timeout: float | None = None,
|
|
60
|
+
) -> httpx.Response:
|
|
61
|
+
"""发送同步 HTTP 请求
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
method: HTTP 方法
|
|
65
|
+
path: 相对于 _base_path 的路径
|
|
66
|
+
json: JSON 请求体
|
|
67
|
+
data: 表单数据
|
|
68
|
+
files: 文件数据
|
|
69
|
+
params: 查询参数
|
|
70
|
+
headers: 额外请求头
|
|
71
|
+
timeout: 请求超时
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
httpx.Response
|
|
75
|
+
"""
|
|
76
|
+
full_path = f"{self._base_path}{path}"
|
|
77
|
+
return self._http.request(
|
|
78
|
+
method,
|
|
79
|
+
full_path,
|
|
80
|
+
json=json,
|
|
81
|
+
data=data,
|
|
82
|
+
files=files,
|
|
83
|
+
params=params,
|
|
84
|
+
headers=headers,
|
|
85
|
+
timeout=timeout,
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
def _get(self, path: str, **kwargs) -> httpx.Response:
|
|
89
|
+
"""发送 GET 请求"""
|
|
90
|
+
return self._request("GET", path, **kwargs)
|
|
91
|
+
|
|
92
|
+
def _post(self, path: str, **kwargs) -> httpx.Response:
|
|
93
|
+
"""发送 POST 请求"""
|
|
94
|
+
return self._request("POST", path, **kwargs)
|
|
95
|
+
|
|
96
|
+
def _put(self, path: str, **kwargs) -> httpx.Response:
|
|
97
|
+
"""发送 PUT 请求"""
|
|
98
|
+
return self._request("PUT", path, **kwargs)
|
|
99
|
+
|
|
100
|
+
def _delete(self, path: str, **kwargs) -> httpx.Response:
|
|
101
|
+
"""发送 DELETE 请求"""
|
|
102
|
+
return self._request("DELETE", path, **kwargs)
|
|
103
|
+
|
|
104
|
+
# ========== 异步请求方法 ==========
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
# ========== 响应处理 ==========
|
|
108
|
+
|
|
109
|
+
def _parse_response(
|
|
110
|
+
self,
|
|
111
|
+
response: httpx.Response,
|
|
112
|
+
model_class: type[T],
|
|
113
|
+
) -> T:
|
|
114
|
+
"""解析响应为 Pydantic 模型
|
|
115
|
+
|
|
116
|
+
支持两种响应格式:
|
|
117
|
+
- 标准格式: {"code": 200, "data": {...}}
|
|
118
|
+
- 直接返回: {...}
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
response: HTTP 响应
|
|
122
|
+
model_class: Pydantic 模型类
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
解析后的模型实例
|
|
126
|
+
"""
|
|
127
|
+
data = response.json()
|
|
128
|
+
|
|
129
|
+
# 处理标准响应格式
|
|
130
|
+
if isinstance(data, dict) and "data" in data:
|
|
131
|
+
data = data["data"]
|
|
132
|
+
|
|
133
|
+
return model_class.model_validate(data)
|
|
134
|
+
|
|
135
|
+
def _parse_list_response(
|
|
136
|
+
self,
|
|
137
|
+
response: httpx.Response,
|
|
138
|
+
model_class: type[T],
|
|
139
|
+
) -> list[T]:
|
|
140
|
+
"""解析列表响应
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
response: HTTP 响应
|
|
144
|
+
model_class: Pydantic 模型类
|
|
145
|
+
|
|
146
|
+
Returns:
|
|
147
|
+
模型实例列表
|
|
148
|
+
"""
|
|
149
|
+
data = response.json()
|
|
150
|
+
|
|
151
|
+
# 处理标准响应格式
|
|
152
|
+
if isinstance(data, dict) and "data" in data:
|
|
153
|
+
data = data["data"]
|
|
154
|
+
|
|
155
|
+
# 确保是列表
|
|
156
|
+
if not isinstance(data, list):
|
|
157
|
+
data = [data]
|
|
158
|
+
|
|
159
|
+
return [model_class.model_validate(item) for item in data]
|
|
160
|
+
|
|
161
|
+
def _parse_raw_response(self, response: httpx.Response) -> dict[str, Any]:
|
|
162
|
+
"""解析原始 JSON 响应
|
|
163
|
+
|
|
164
|
+
Args:
|
|
165
|
+
response: HTTP 响应
|
|
166
|
+
|
|
167
|
+
Returns:
|
|
168
|
+
原始 JSON 数据
|
|
169
|
+
"""
|
|
170
|
+
data = response.json()
|
|
171
|
+
|
|
172
|
+
# 处理标准响应格式
|
|
173
|
+
if isinstance(data, dict) and "data" in data:
|
|
174
|
+
return data["data"]
|
|
175
|
+
|
|
176
|
+
return data
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
__all__ = ["BaseAPI"]
|
xparse_client/_client.py
ADDED
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
"""XParseClient SDK 主入口
|
|
2
|
+
|
|
3
|
+
提供 SDK 的统一入口类,支持懒加载 API 模块。
|
|
4
|
+
|
|
5
|
+
Example:
|
|
6
|
+
>>> from xparse_client import XParseClient
|
|
7
|
+
>>>
|
|
8
|
+
>>> # 创建客户端
|
|
9
|
+
>>> client = XParseClient(
|
|
10
|
+
... app_id="your-app-id",
|
|
11
|
+
... secret_code="your-secret-code"
|
|
12
|
+
... )
|
|
13
|
+
>>>
|
|
14
|
+
>>> # 同步解析
|
|
15
|
+
>>> result = client.parse.partition(file=file_bytes, filename="doc.pdf")
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import importlib
|
|
21
|
+
from typing import TYPE_CHECKING, Any
|
|
22
|
+
|
|
23
|
+
from ._config import SDKConfiguration
|
|
24
|
+
from ._http import HTTPClient
|
|
25
|
+
|
|
26
|
+
if TYPE_CHECKING:
|
|
27
|
+
from .api.extract import Extract
|
|
28
|
+
from .api.local import Local
|
|
29
|
+
from .api.parse import Parse
|
|
30
|
+
from .api.pipeline import PipelineAPI
|
|
31
|
+
from .api.workflows import Workflows
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class XParseClient:
|
|
35
|
+
"""xParse SDK 主入口类
|
|
36
|
+
|
|
37
|
+
提供统一的 API 访问入口,支持懒加载。
|
|
38
|
+
|
|
39
|
+
Attributes:
|
|
40
|
+
parse: Parse API(文档解析)
|
|
41
|
+
extract: Extract API(信息抽取)
|
|
42
|
+
pipeline: Pipeline API(自定义流水线)
|
|
43
|
+
local: Local API(本地批处理)
|
|
44
|
+
workflows: Workflows API(远程批处理)
|
|
45
|
+
|
|
46
|
+
Example:
|
|
47
|
+
>>> # 方式1: 直接传参
|
|
48
|
+
>>> client = XParseClient(
|
|
49
|
+
... app_id="your-app-id",
|
|
50
|
+
... secret_code="your-secret-code"
|
|
51
|
+
... )
|
|
52
|
+
>>>
|
|
53
|
+
>>> # 方式2: 从环境变量
|
|
54
|
+
>>> client = XParseClient.from_env()
|
|
55
|
+
>>>
|
|
56
|
+
>>> # 单文件处理
|
|
57
|
+
>>> result = client.parse.partition(file=file_bytes, filename="doc.pdf")
|
|
58
|
+
>>>
|
|
59
|
+
>>> # 服务端异步任务(非客户端异步)
|
|
60
|
+
>>> job = client.parse.create_async_job(file=file_bytes, filename="doc.pdf")
|
|
61
|
+
>>> result = client.parse.wait_for_result(job_id=job.job_id)
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
# API 模块映射: 属性名 -> (模块路径, 类名)
|
|
65
|
+
_API_MAP: dict[str, tuple] = {
|
|
66
|
+
"parse": ("xparse_client.api.parse", "Parse"),
|
|
67
|
+
"extract": ("xparse_client.api.extract", "Extract"),
|
|
68
|
+
"pipeline": ("xparse_client.api.pipeline", "PipelineAPI"),
|
|
69
|
+
"local": ("xparse_client.api.local", "Local"),
|
|
70
|
+
"workflows": ("xparse_client.api.workflows", "Workflows"),
|
|
71
|
+
# v2 版本添加:
|
|
72
|
+
# "sources": ("xparse_client.api.sources", "Sources"),
|
|
73
|
+
# "destinations": ("xparse_client.api.destinations", "Destinations"),
|
|
74
|
+
# "jobs": ("xparse_client.api.jobs", "Jobs"),
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
# 类型注解(帮助 IDE 提供代码补全)
|
|
78
|
+
parse: Parse
|
|
79
|
+
extract: Extract
|
|
80
|
+
pipeline: PipelineAPI
|
|
81
|
+
local: Local
|
|
82
|
+
workflows: Workflows
|
|
83
|
+
|
|
84
|
+
def __init__(
|
|
85
|
+
self,
|
|
86
|
+
app_id: str | None = None,
|
|
87
|
+
secret_code: str | None = None,
|
|
88
|
+
*,
|
|
89
|
+
server_url: str | None = None,
|
|
90
|
+
timeout: float | None = None,
|
|
91
|
+
max_retries: int | None = None,
|
|
92
|
+
config: SDKConfiguration | None = None,
|
|
93
|
+
http_client: HTTPClient | None = None,
|
|
94
|
+
) -> None:
|
|
95
|
+
"""初始化 XParseClient
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
app_id: TextIn 应用 ID
|
|
99
|
+
secret_code: TextIn 密钥
|
|
100
|
+
server_url: 服务器地址(默认 https://api.textin.com)
|
|
101
|
+
timeout: 请求超时时间(秒)
|
|
102
|
+
max_retries: 最大重试次数
|
|
103
|
+
config: 自定义 SDKConfiguration(如果提供,会忽略其他参数)
|
|
104
|
+
http_client: 自定义 HTTPClient(用于测试)
|
|
105
|
+
|
|
106
|
+
Raises:
|
|
107
|
+
ConfigurationError: 配置参数无效
|
|
108
|
+
"""
|
|
109
|
+
# 使用传入的 config 或创建新的
|
|
110
|
+
if config is not None:
|
|
111
|
+
self._config = config
|
|
112
|
+
else:
|
|
113
|
+
# 构建配置参数
|
|
114
|
+
config_kwargs: dict[str, Any] = {}
|
|
115
|
+
if app_id is not None:
|
|
116
|
+
config_kwargs["app_id"] = app_id
|
|
117
|
+
if secret_code is not None:
|
|
118
|
+
config_kwargs["secret_code"] = secret_code
|
|
119
|
+
if server_url is not None:
|
|
120
|
+
config_kwargs["server_url"] = server_url
|
|
121
|
+
if timeout is not None:
|
|
122
|
+
config_kwargs["timeout"] = timeout
|
|
123
|
+
if max_retries is not None:
|
|
124
|
+
config_kwargs["max_retries"] = max_retries
|
|
125
|
+
|
|
126
|
+
# 如果没有提供 app_id/secret_code,尝试从环境变量读取
|
|
127
|
+
if "app_id" not in config_kwargs or "secret_code" not in config_kwargs:
|
|
128
|
+
self._config = SDKConfiguration.from_env(**config_kwargs)
|
|
129
|
+
else:
|
|
130
|
+
self._config = SDKConfiguration(**config_kwargs)
|
|
131
|
+
|
|
132
|
+
# 初始化 HTTP 客户端
|
|
133
|
+
self._http = http_client or HTTPClient(self._config)
|
|
134
|
+
self._owns_http = http_client is None
|
|
135
|
+
|
|
136
|
+
# API 模块缓存
|
|
137
|
+
self._api_cache: dict[str, Any] = {}
|
|
138
|
+
|
|
139
|
+
@classmethod
|
|
140
|
+
def from_env(cls, **kwargs) -> XParseClient:
|
|
141
|
+
"""从环境变量创建客户端
|
|
142
|
+
|
|
143
|
+
环境变量:
|
|
144
|
+
TEXTIN_APP_ID: 应用 ID
|
|
145
|
+
TEXTIN_SECRET_CODE: 密钥
|
|
146
|
+
TEXTIN_SERVER_URL: 服务器地址(可选)
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
**kwargs: 其他配置参数
|
|
150
|
+
|
|
151
|
+
Returns:
|
|
152
|
+
XParseClient 实例
|
|
153
|
+
|
|
154
|
+
Example:
|
|
155
|
+
>>> # 设置环境变量后
|
|
156
|
+
>>> client = XParseClient.from_env()
|
|
157
|
+
"""
|
|
158
|
+
config = SDKConfiguration.from_env(**kwargs)
|
|
159
|
+
return cls(config=config)
|
|
160
|
+
|
|
161
|
+
def __getattr__(self, name: str) -> Any:
|
|
162
|
+
"""懒加载 API 模块
|
|
163
|
+
|
|
164
|
+
当访问 API 属性(如 client.parse)时,动态导入并实例化对应的 API 类。
|
|
165
|
+
|
|
166
|
+
Args:
|
|
167
|
+
name: 属性名
|
|
168
|
+
|
|
169
|
+
Returns:
|
|
170
|
+
API 模块实例
|
|
171
|
+
|
|
172
|
+
Raises:
|
|
173
|
+
AttributeError: 未知的属性名
|
|
174
|
+
"""
|
|
175
|
+
if name.startswith("_"):
|
|
176
|
+
raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}'")
|
|
177
|
+
|
|
178
|
+
if name in self._API_MAP:
|
|
179
|
+
# 检查缓存
|
|
180
|
+
if name not in self._api_cache:
|
|
181
|
+
module_path, class_name = self._API_MAP[name]
|
|
182
|
+
try:
|
|
183
|
+
module = importlib.import_module(module_path)
|
|
184
|
+
api_class: type = getattr(module, class_name)
|
|
185
|
+
self._api_cache[name] = api_class(
|
|
186
|
+
self._config,
|
|
187
|
+
self._http,
|
|
188
|
+
)
|
|
189
|
+
except ImportError as e:
|
|
190
|
+
raise AttributeError(
|
|
191
|
+
f"无法加载 API 模块 '{name}': {e}"
|
|
192
|
+
) from e
|
|
193
|
+
|
|
194
|
+
return self._api_cache[name]
|
|
195
|
+
|
|
196
|
+
raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}'")
|
|
197
|
+
|
|
198
|
+
def close(self) -> None:
|
|
199
|
+
"""关闭客户端,释放资源"""
|
|
200
|
+
if self._owns_http:
|
|
201
|
+
self._http.close()
|
|
202
|
+
# 清空 API 缓存
|
|
203
|
+
self._api_cache.clear()
|
|
204
|
+
|
|
205
|
+
def __enter__(self) -> XParseClient:
|
|
206
|
+
return self
|
|
207
|
+
|
|
208
|
+
def __exit__(self, *args) -> None:
|
|
209
|
+
self.close()
|
|
210
|
+
|
|
211
|
+
@property
|
|
212
|
+
def config(self) -> SDKConfiguration:
|
|
213
|
+
"""获取 SDK 配置"""
|
|
214
|
+
return self._config
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
__all__ = ["XParseClient"]
|
xparse_client/_config.py
ADDED
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
"""SDK 配置管理
|
|
2
|
+
|
|
3
|
+
提供 SDK 全局配置类,管理认证信息、服务器地址、超时设置等。
|
|
4
|
+
|
|
5
|
+
Example:
|
|
6
|
+
>>> config = SDKConfiguration(
|
|
7
|
+
... app_id="your-app-id",
|
|
8
|
+
... secret_code="your-secret-code",
|
|
9
|
+
... server_url="https://api.textin.com"
|
|
10
|
+
... )
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import os
|
|
16
|
+
from dataclasses import dataclass, field
|
|
17
|
+
|
|
18
|
+
from .exceptions import ConfigurationError
|
|
19
|
+
|
|
20
|
+
# 默认配置
|
|
21
|
+
DEFAULT_SERVER_URL = "https://api.textin.com"
|
|
22
|
+
DEFAULT_TIMEOUT = 630.0 # 秒
|
|
23
|
+
DEFAULT_MAX_RETRIES = 3
|
|
24
|
+
DEFAULT_BACKOFF_BASE = 2.0
|
|
25
|
+
DEFAULT_BACKOFF_MAX = 60.0
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass
|
|
29
|
+
class SDKConfiguration:
|
|
30
|
+
"""SDK 配置类
|
|
31
|
+
|
|
32
|
+
管理 SDK 的全局配置,包括认证信息、服务器地址、超时设置等。
|
|
33
|
+
|
|
34
|
+
Attributes:
|
|
35
|
+
app_id: TextIn 应用 ID
|
|
36
|
+
secret_code: TextIn 密钥
|
|
37
|
+
server_url: 服务器地址
|
|
38
|
+
timeout: 请求超时时间(秒)
|
|
39
|
+
max_retries: 最大重试次数
|
|
40
|
+
backoff_base: 退避策略基数
|
|
41
|
+
backoff_max: 退避策略最大等待时间(秒)
|
|
42
|
+
extra_headers: 额外的请求头
|
|
43
|
+
|
|
44
|
+
Example:
|
|
45
|
+
>>> # 方式1: 直接传参
|
|
46
|
+
>>> config = SDKConfiguration(
|
|
47
|
+
... app_id="your-app-id",
|
|
48
|
+
... secret_code="your-secret-code"
|
|
49
|
+
... )
|
|
50
|
+
>>>
|
|
51
|
+
>>> # 方式2: 从环境变量读取
|
|
52
|
+
>>> config = SDKConfiguration.from_env()
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
app_id: str
|
|
56
|
+
secret_code: str
|
|
57
|
+
server_url: str = DEFAULT_SERVER_URL
|
|
58
|
+
timeout: float = DEFAULT_TIMEOUT
|
|
59
|
+
max_retries: int = DEFAULT_MAX_RETRIES
|
|
60
|
+
backoff_base: float = DEFAULT_BACKOFF_BASE
|
|
61
|
+
backoff_max: float = DEFAULT_BACKOFF_MAX
|
|
62
|
+
extra_headers: dict[str, str] = field(default_factory=dict)
|
|
63
|
+
|
|
64
|
+
def __post_init__(self) -> None:
|
|
65
|
+
"""验证配置"""
|
|
66
|
+
if not self.app_id:
|
|
67
|
+
raise ConfigurationError(
|
|
68
|
+
"app_id 不能为空",
|
|
69
|
+
details={"param": "app_id"},
|
|
70
|
+
)
|
|
71
|
+
if not self.secret_code:
|
|
72
|
+
raise ConfigurationError(
|
|
73
|
+
"secret_code 不能为空",
|
|
74
|
+
details={"param": "secret_code"},
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
# 移除 server_url 末尾的斜杠
|
|
78
|
+
self.server_url = self.server_url.rstrip("/")
|
|
79
|
+
|
|
80
|
+
# 验证数值参数
|
|
81
|
+
if self.timeout <= 0:
|
|
82
|
+
raise ConfigurationError(
|
|
83
|
+
"timeout 必须大于 0",
|
|
84
|
+
details={"param": "timeout", "value": self.timeout},
|
|
85
|
+
)
|
|
86
|
+
if self.max_retries < 0:
|
|
87
|
+
raise ConfigurationError(
|
|
88
|
+
"max_retries 不能为负数",
|
|
89
|
+
details={"param": "max_retries", "value": self.max_retries},
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
@classmethod
|
|
93
|
+
def from_env(
|
|
94
|
+
cls,
|
|
95
|
+
*,
|
|
96
|
+
app_id_env: str = "TEXTIN_APP_ID",
|
|
97
|
+
secret_code_env: str = "TEXTIN_SECRET_CODE",
|
|
98
|
+
server_url_env: str = "TEXTIN_SERVER_URL",
|
|
99
|
+
**kwargs,
|
|
100
|
+
) -> SDKConfiguration:
|
|
101
|
+
"""从环境变量创建配置
|
|
102
|
+
|
|
103
|
+
Args:
|
|
104
|
+
app_id_env: 存储 app_id 的环境变量名
|
|
105
|
+
secret_code_env: 存储 secret_code 的环境变量名
|
|
106
|
+
server_url_env: 存储 server_url 的环境变量名
|
|
107
|
+
**kwargs: 其他配置参数,会覆盖环境变量
|
|
108
|
+
|
|
109
|
+
Returns:
|
|
110
|
+
SDKConfiguration 实例
|
|
111
|
+
|
|
112
|
+
Raises:
|
|
113
|
+
ConfigurationError: 必要的环境变量未设置
|
|
114
|
+
|
|
115
|
+
Example:
|
|
116
|
+
>>> # 设置环境变量
|
|
117
|
+
>>> # export TEXTIN_APP_ID=your-app-id
|
|
118
|
+
>>> # export TEXTIN_SECRET_CODE=your-secret-code
|
|
119
|
+
>>> config = SDKConfiguration.from_env()
|
|
120
|
+
"""
|
|
121
|
+
app_id = kwargs.pop("app_id", None) or os.environ.get(app_id_env)
|
|
122
|
+
secret_code = kwargs.pop("secret_code", None) or os.environ.get(secret_code_env)
|
|
123
|
+
server_url = kwargs.pop("server_url", None) or os.environ.get(
|
|
124
|
+
server_url_env, DEFAULT_SERVER_URL
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
if not app_id:
|
|
128
|
+
raise ConfigurationError(
|
|
129
|
+
f"app_id 未设置,请设置环境变量 {app_id_env} 或传入 app_id 参数",
|
|
130
|
+
details={"env_var": app_id_env},
|
|
131
|
+
)
|
|
132
|
+
if not secret_code:
|
|
133
|
+
raise ConfigurationError(
|
|
134
|
+
f"secret_code 未设置,请设置环境变量 {secret_code_env} 或传入 secret_code 参数",
|
|
135
|
+
details={"env_var": secret_code_env},
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
return cls(
|
|
139
|
+
app_id=app_id,
|
|
140
|
+
secret_code=secret_code,
|
|
141
|
+
server_url=server_url,
|
|
142
|
+
**kwargs,
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
def get_auth_headers(self) -> dict[str, str]:
|
|
146
|
+
"""获取认证请求头
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
包含认证信息的请求头字典
|
|
150
|
+
"""
|
|
151
|
+
headers = {
|
|
152
|
+
"x-ti-app-id": self.app_id,
|
|
153
|
+
"x-ti-secret-code": self.secret_code,
|
|
154
|
+
}
|
|
155
|
+
# 合并额外的请求头
|
|
156
|
+
headers.update(self.extra_headers)
|
|
157
|
+
return headers
|
|
158
|
+
|
|
159
|
+
def get_base_url(self) -> str:
|
|
160
|
+
"""获取 API 基础 URL
|
|
161
|
+
|
|
162
|
+
Returns:
|
|
163
|
+
API 基础 URL
|
|
164
|
+
"""
|
|
165
|
+
return self.server_url
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
@dataclass
|
|
169
|
+
class RetryConfiguration:
|
|
170
|
+
"""重试配置
|
|
171
|
+
|
|
172
|
+
控制请求重试的行为。
|
|
173
|
+
|
|
174
|
+
Attributes:
|
|
175
|
+
max_retries: 最大重试次数
|
|
176
|
+
backoff_base: 退避策略基数(秒)
|
|
177
|
+
backoff_max: 最大退避时间(秒)
|
|
178
|
+
retry_status_codes: 需要重试的 HTTP 状态码
|
|
179
|
+
"""
|
|
180
|
+
|
|
181
|
+
max_retries: int = DEFAULT_MAX_RETRIES
|
|
182
|
+
backoff_base: float = DEFAULT_BACKOFF_BASE
|
|
183
|
+
backoff_max: float = DEFAULT_BACKOFF_MAX
|
|
184
|
+
retry_status_codes: tuple = (429, 500, 502, 503, 504)
|
|
185
|
+
|
|
186
|
+
def calculate_backoff(self, attempt: int) -> float:
|
|
187
|
+
"""计算退避时间
|
|
188
|
+
|
|
189
|
+
使用指数退避策略: min(backoff_max, backoff_base ** attempt)
|
|
190
|
+
|
|
191
|
+
Args:
|
|
192
|
+
attempt: 当前重试次数(从 0 开始)
|
|
193
|
+
|
|
194
|
+
Returns:
|
|
195
|
+
退避等待时间(秒)
|
|
196
|
+
"""
|
|
197
|
+
delay = min(self.backoff_max, self.backoff_base ** attempt)
|
|
198
|
+
return delay
|
|
199
|
+
|
|
200
|
+
def should_retry(self, status_code: int, attempt: int) -> bool:
|
|
201
|
+
"""判断是否应该重试
|
|
202
|
+
|
|
203
|
+
Args:
|
|
204
|
+
status_code: HTTP 状态码
|
|
205
|
+
attempt: 当前重试次数(从 0 开始)
|
|
206
|
+
|
|
207
|
+
Returns:
|
|
208
|
+
是否应该重试
|
|
209
|
+
"""
|
|
210
|
+
if attempt >= self.max_retries:
|
|
211
|
+
return False
|
|
212
|
+
return status_code in self.retry_status_codes
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
__all__ = [
|
|
216
|
+
"SDKConfiguration",
|
|
217
|
+
"RetryConfiguration",
|
|
218
|
+
"DEFAULT_SERVER_URL",
|
|
219
|
+
"DEFAULT_TIMEOUT",
|
|
220
|
+
"DEFAULT_MAX_RETRIES",
|
|
221
|
+
]
|