xparse-client 0.2.18__tar.gz → 0.2.19__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {xparse_client-0.2.18 → xparse_client-0.2.19}/PKG-INFO +1 -1
- {xparse_client-0.2.18 → xparse_client-0.2.19}/pyproject.toml +1 -1
- {xparse_client-0.2.18 → xparse_client-0.2.19}/xparse_client/pipeline/pipeline.py +64 -9
- {xparse_client-0.2.18 → xparse_client-0.2.19}/LICENSE +0 -0
- {xparse_client-0.2.18 → xparse_client-0.2.19}/MANIFEST.in +0 -0
- {xparse_client-0.2.18 → xparse_client-0.2.19}/README.md +0 -0
- {xparse_client-0.2.18 → xparse_client-0.2.19}/requirements.txt +0 -0
- {xparse_client-0.2.18 → xparse_client-0.2.19}/setup.cfg +0 -0
- {xparse_client-0.2.18 → xparse_client-0.2.19}/xparse_client/__init__.py +0 -0
- {xparse_client-0.2.18 → xparse_client-0.2.19}/xparse_client/pipeline/__init__.py +0 -0
- {xparse_client-0.2.18 → xparse_client-0.2.19}/xparse_client/pipeline/config.py +0 -0
- {xparse_client-0.2.18 → xparse_client-0.2.19}/xparse_client/pipeline/destinations.py +0 -0
- {xparse_client-0.2.18 → xparse_client-0.2.19}/xparse_client/pipeline/sources.py +0 -0
- {xparse_client-0.2.18 → xparse_client-0.2.19}/xparse_client.egg-info/SOURCES.txt +0 -0
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
|
|
4
4
|
import json
|
|
5
5
|
import logging
|
|
6
|
+
import re
|
|
6
7
|
import time
|
|
7
8
|
from datetime import datetime, timezone
|
|
8
9
|
from pathlib import Path
|
|
@@ -220,6 +221,67 @@ class Pipeline:
|
|
|
220
221
|
|
|
221
222
|
return config
|
|
222
223
|
|
|
224
|
+
def _extract_error_message(self, response: requests.Response) -> Tuple[str, str]:
|
|
225
|
+
"""
|
|
226
|
+
从响应中提取规范化的错误信息
|
|
227
|
+
|
|
228
|
+
Returns:
|
|
229
|
+
Tuple[str, str]: (error_msg, x_request_id)
|
|
230
|
+
"""
|
|
231
|
+
# 首先尝试从响应头中提取 x-request-id(requests的headers大小写不敏感)
|
|
232
|
+
x_request_id = response.headers.get('x-request-id', '')
|
|
233
|
+
error_msg = ''
|
|
234
|
+
|
|
235
|
+
# 获取Content-Type
|
|
236
|
+
content_type = response.headers.get('Content-Type', '').lower()
|
|
237
|
+
|
|
238
|
+
# 尝试解析JSON响应
|
|
239
|
+
if 'application/json' in content_type:
|
|
240
|
+
try:
|
|
241
|
+
result = response.json()
|
|
242
|
+
# 如果响应头中没有x-request-id,尝试从响应体中获取
|
|
243
|
+
if not x_request_id:
|
|
244
|
+
x_request_id = result.get('x_request_id', '')
|
|
245
|
+
error_msg = result.get('message', result.get('msg', f'HTTP {response.status_code}'))
|
|
246
|
+
return error_msg, x_request_id
|
|
247
|
+
except:
|
|
248
|
+
pass
|
|
249
|
+
|
|
250
|
+
# 处理HTML响应
|
|
251
|
+
if 'text/html' in content_type or response.text.strip().startswith('<'):
|
|
252
|
+
try:
|
|
253
|
+
# 从HTML中提取标题(通常包含状态码和状态文本)
|
|
254
|
+
title_match = re.search(r'<title>(.*?)</title>', response.text, re.IGNORECASE)
|
|
255
|
+
if title_match:
|
|
256
|
+
error_msg = title_match.group(1).strip()
|
|
257
|
+
else:
|
|
258
|
+
# 如果没有title,尝试提取h1标签
|
|
259
|
+
h1_match = re.search(r'<h1>(.*?)</h1>', response.text, re.IGNORECASE)
|
|
260
|
+
if h1_match:
|
|
261
|
+
error_msg = h1_match.group(1).strip()
|
|
262
|
+
else:
|
|
263
|
+
error_msg = f'HTTP {response.status_code}'
|
|
264
|
+
except:
|
|
265
|
+
error_msg = f'HTTP {response.status_code}'
|
|
266
|
+
|
|
267
|
+
# 处理纯文本响应
|
|
268
|
+
elif 'text/plain' in content_type:
|
|
269
|
+
error_msg = response.text[:200].strip() if response.text else f'HTTP {response.status_code}'
|
|
270
|
+
|
|
271
|
+
# 其他情况
|
|
272
|
+
else:
|
|
273
|
+
if response.text:
|
|
274
|
+
# 尝试截取前200字符,但去除换行和多余空格
|
|
275
|
+
text = response.text[:200].strip()
|
|
276
|
+
# 如果包含多行,只取第一行
|
|
277
|
+
if '\n' in text:
|
|
278
|
+
text = text.split('\n')[0].strip()
|
|
279
|
+
error_msg = text if text else f'HTTP {response.status_code}'
|
|
280
|
+
else:
|
|
281
|
+
error_msg = f'HTTP {response.status_code}'
|
|
282
|
+
|
|
283
|
+
return error_msg, x_request_id
|
|
284
|
+
|
|
223
285
|
def _call_pipeline_api(self, file_bytes: bytes, filename: str, data_source: Dict[str, Any]) -> Optional[Dict[str, Any]]:
|
|
224
286
|
url = f"{self.api_base_url}/pipeline"
|
|
225
287
|
max_retries = 3
|
|
@@ -263,15 +325,8 @@ class Pipeline:
|
|
|
263
325
|
logger.error(f"Pipeline 接口返回错误: code={result.get('code')}, message={error_msg}, x_request_id={x_request_id}")
|
|
264
326
|
return None
|
|
265
327
|
else:
|
|
266
|
-
#
|
|
267
|
-
x_request_id =
|
|
268
|
-
error_msg = ''
|
|
269
|
-
try:
|
|
270
|
-
result = response.json()
|
|
271
|
-
x_request_id = result.get('x_request_id', '')
|
|
272
|
-
error_msg = result.get('message', result.get('msg', response.text[:200]))
|
|
273
|
-
except:
|
|
274
|
-
error_msg = response.text[:200] if response.text else f'HTTP {response.status_code}'
|
|
328
|
+
# 使用规范化函数提取错误信息
|
|
329
|
+
error_msg, x_request_id = self._extract_error_message(response)
|
|
275
330
|
|
|
276
331
|
print(f" ✗ API 错误 {response.status_code}: {error_msg}, x_request_id={x_request_id}, 重试 {try_count + 1}/{max_retries}")
|
|
277
332
|
logger.warning(f"API 错误 {response.status_code}: {error_msg}, x_request_id={x_request_id}, 重试 {try_count + 1}/{max_retries}")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|