sycommon-python-lib 0.1.56__py3-none-any.whl → 0.1.56b2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sycommon/config/Config.py +3 -24
- sycommon/llm/embedding.py +23 -78
- sycommon/llm/get_llm.py +164 -24
- sycommon/logging/kafka_log.py +433 -187
- sycommon/middleware/exception.py +16 -10
- sycommon/middleware/timeout.py +1 -2
- sycommon/middleware/traceid.py +76 -81
- sycommon/rabbitmq/rabbitmq_client.py +242 -232
- sycommon/rabbitmq/rabbitmq_pool.py +218 -278
- sycommon/rabbitmq/rabbitmq_service.py +843 -25
- sycommon/services.py +96 -122
- sycommon/synacos/nacos_service.py +779 -63
- sycommon/tools/merge_headers.py +0 -20
- sycommon/tools/snowflake.py +153 -101
- {sycommon_python_lib-0.1.56.dist-info → sycommon_python_lib-0.1.56b2.dist-info}/METADATA +8 -10
- {sycommon_python_lib-0.1.56.dist-info → sycommon_python_lib-0.1.56b2.dist-info}/RECORD +19 -40
- sycommon/config/LangfuseConfig.py +0 -15
- sycommon/config/SentryConfig.py +0 -13
- sycommon/llm/llm_tokens.py +0 -119
- sycommon/llm/struct_token.py +0 -192
- sycommon/llm/sy_langfuse.py +0 -103
- sycommon/llm/usage_token.py +0 -117
- sycommon/notice/__init__.py +0 -0
- sycommon/notice/uvicorn_monitor.py +0 -200
- sycommon/rabbitmq/rabbitmq_service_client_manager.py +0 -206
- sycommon/rabbitmq/rabbitmq_service_connection_monitor.py +0 -73
- sycommon/rabbitmq/rabbitmq_service_consumer_manager.py +0 -285
- sycommon/rabbitmq/rabbitmq_service_core.py +0 -117
- sycommon/rabbitmq/rabbitmq_service_producer_manager.py +0 -238
- sycommon/sentry/__init__.py +0 -0
- sycommon/sentry/sy_sentry.py +0 -35
- sycommon/synacos/nacos_client_base.py +0 -119
- sycommon/synacos/nacos_config_manager.py +0 -107
- sycommon/synacos/nacos_heartbeat_manager.py +0 -144
- sycommon/synacos/nacos_service_discovery.py +0 -157
- sycommon/synacos/nacos_service_registration.py +0 -270
- sycommon/tools/env.py +0 -62
- {sycommon_python_lib-0.1.56.dist-info → sycommon_python_lib-0.1.56b2.dist-info}/WHEEL +0 -0
- {sycommon_python_lib-0.1.56.dist-info → sycommon_python_lib-0.1.56b2.dist-info}/entry_points.txt +0 -0
- {sycommon_python_lib-0.1.56.dist-info → sycommon_python_lib-0.1.56b2.dist-info}/top_level.txt +0 -0
sycommon/logging/kafka_log.py
CHANGED
|
@@ -1,18 +1,21 @@
|
|
|
1
1
|
import os
|
|
2
|
+
import pprint
|
|
2
3
|
import sys
|
|
3
|
-
import json
|
|
4
|
-
import socket
|
|
5
|
-
import threading
|
|
6
4
|
import traceback
|
|
7
5
|
import asyncio
|
|
6
|
+
import atexit
|
|
8
7
|
from datetime import datetime
|
|
9
|
-
|
|
8
|
+
import json
|
|
9
|
+
import re
|
|
10
|
+
import socket
|
|
11
|
+
import time
|
|
12
|
+
import threading
|
|
13
|
+
from queue import Queue, Full, Empty
|
|
10
14
|
from kafka import KafkaProducer
|
|
11
15
|
from loguru import logger
|
|
12
|
-
|
|
16
|
+
import loguru
|
|
13
17
|
from sycommon.config.Config import Config, SingletonMeta
|
|
14
18
|
from sycommon.middleware.context import current_trace_id, current_headers
|
|
15
|
-
from sycommon.tools.env import check_env_flag
|
|
16
19
|
from sycommon.tools.snowflake import Snowflake
|
|
17
20
|
|
|
18
21
|
# 配置Loguru的颜色方案
|
|
@@ -24,201 +27,418 @@ LOGURU_FORMAT = (
|
|
|
24
27
|
)
|
|
25
28
|
|
|
26
29
|
|
|
27
|
-
class
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
30
|
+
class KafkaLogger(metaclass=SingletonMeta):
|
|
31
|
+
_producer = None
|
|
32
|
+
_topic = None
|
|
33
|
+
_service_id = None
|
|
34
|
+
_log_queue = Queue(maxsize=10000)
|
|
35
|
+
_stop_event = threading.Event()
|
|
36
|
+
_sender_thread = None
|
|
37
|
+
_log_pattern = re.compile(
|
|
38
|
+
r'^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d+)\s*\|\s*(\w+)\s*\|\s*(\S+):(\S+):(\d+)\s*-\s*(\{.*\})\s*$'
|
|
39
|
+
)
|
|
40
|
+
_queue_warning_threshold = 9000
|
|
41
|
+
_queue_warning_interval = 60 # 秒
|
|
42
|
+
_last_queue_warning = 0
|
|
43
|
+
_shutdown_timeout = 15 # 关闭超时时间,秒
|
|
44
|
+
_config = None # 配置变量存储
|
|
45
|
+
|
|
46
|
+
@staticmethod
|
|
47
|
+
def setup_logger(config: dict):
|
|
48
|
+
# 保存配置到类变量
|
|
49
|
+
KafkaLogger._config = config
|
|
31
50
|
|
|
32
|
-
def __init__(self, service_id: str):
|
|
33
|
-
self.service_id = service_id
|
|
34
|
-
# 获取配置
|
|
35
51
|
from sycommon.synacos.nacos_service import NacosService
|
|
36
|
-
|
|
37
|
-
|
|
52
|
+
KafkaLogger._topic = "shengye-json-log"
|
|
53
|
+
KafkaLogger._service_id = NacosService(config).service_name
|
|
54
|
+
|
|
55
|
+
# 获取 common 配置
|
|
56
|
+
common = NacosService(config).share_configs.get("common.yml", {})
|
|
38
57
|
bootstrap_servers = common.get("log", {}).get(
|
|
39
58
|
"kafka", {}).get("servers", None)
|
|
40
59
|
|
|
41
|
-
|
|
60
|
+
# 创建生产者,优化配置参数
|
|
61
|
+
KafkaLogger._producer = KafkaProducer(
|
|
42
62
|
bootstrap_servers=bootstrap_servers,
|
|
43
63
|
value_serializer=lambda v: json.dumps(
|
|
44
64
|
v, ensure_ascii=False).encode('utf-8'),
|
|
45
|
-
#
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
65
|
+
max_block_ms=60000, # 增加最大阻塞时间从30秒到60秒
|
|
66
|
+
retries=10, # 增加重试次数从5次到10次
|
|
67
|
+
request_timeout_ms=30000, # 增加请求超时时间从10秒到30秒
|
|
68
|
+
compression_type='gzip', # 添加压缩以减少网络传输量
|
|
69
|
+
batch_size=16384, # 增大批处理大小
|
|
70
|
+
linger_ms=5, # 添加短暂延迟以允许更多消息批处理
|
|
71
|
+
buffer_memory=67108864, # 增大缓冲区内存
|
|
72
|
+
connections_max_idle_ms=540000, # 连接最大空闲时间
|
|
73
|
+
reconnect_backoff_max_ms=10000, # 增加重连退避最大时间
|
|
74
|
+
max_in_flight_requests_per_connection=1, # 限制单个连接上未确认的请求数量
|
|
75
|
+
# enable_idempotence=True, # 开启幂等性
|
|
53
76
|
)
|
|
54
77
|
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
# 1. 获取原始日志记录
|
|
62
|
-
record = message.record
|
|
63
|
-
|
|
64
|
-
# 2. 提取 TraceID
|
|
65
|
-
trace_id = None
|
|
66
|
-
try:
|
|
67
|
-
# 如果业务方传的是 JSON 字符串作为 message
|
|
68
|
-
msg_obj = json.loads(record["message"])
|
|
69
|
-
if isinstance(msg_obj, dict):
|
|
70
|
-
trace_id = msg_obj.get("trace_id")
|
|
71
|
-
except:
|
|
72
|
-
pass
|
|
73
|
-
|
|
74
|
-
if not trace_id:
|
|
75
|
-
trace_id = current_trace_id.get()
|
|
76
|
-
|
|
77
|
-
if not trace_id:
|
|
78
|
-
trace_id = str(Snowflake.id)
|
|
79
|
-
else:
|
|
80
|
-
trace_id = str(trace_id)
|
|
81
|
-
|
|
82
|
-
# 3. 提取异常详情 (如果有)
|
|
83
|
-
error_detail = ""
|
|
84
|
-
if record["exception"] is not None:
|
|
85
|
-
# Loguru 的 exception 对象
|
|
86
|
-
error_detail = "".join(traceback.format_exception(
|
|
87
|
-
record["exception"].type,
|
|
88
|
-
record["exception"].value,
|
|
89
|
-
record["exception"].traceback
|
|
90
|
-
))
|
|
91
|
-
elif "error" in record["extra"]:
|
|
92
|
-
# 兼容其他方式注入的异常
|
|
93
|
-
error_detail = str(record["extra"].get("error"))
|
|
94
|
-
|
|
95
|
-
# 4. 获取主机信息
|
|
96
|
-
try:
|
|
97
|
-
ip = socket.gethostbyname(socket.gethostname())
|
|
98
|
-
except:
|
|
99
|
-
ip = '127.0.0.1'
|
|
100
|
-
host_name = socket.gethostname()
|
|
101
|
-
|
|
102
|
-
# 5. 获取线程/协程信息
|
|
103
|
-
try:
|
|
104
|
-
task = asyncio.current_task()
|
|
105
|
-
thread_info = f"coroutine:{task.get_name()}" if task else f"thread:{threading.current_thread().name}"
|
|
106
|
-
except RuntimeError:
|
|
107
|
-
thread_info = f"thread:{threading.current_thread().name}"
|
|
108
|
-
|
|
109
|
-
# 6. 提取类名/文件名信息
|
|
110
|
-
file_name = record["file"].name
|
|
111
|
-
logger_name = record["name"]
|
|
112
|
-
if logger_name and logger_name != file_name:
|
|
113
|
-
class_name = f"{file_name}:{logger_name}"
|
|
114
|
-
else:
|
|
115
|
-
class_name = file_name
|
|
116
|
-
|
|
117
|
-
# 7. 构建最终的 Kafka 日志结构
|
|
118
|
-
log_entry = {
|
|
119
|
-
"traceId": trace_id,
|
|
120
|
-
"sySpanId": "",
|
|
121
|
-
"syBizId": "",
|
|
122
|
-
"ptxId": "",
|
|
123
|
-
"time": record["time"].strftime("%Y-%m-%d %H:%M:%S"),
|
|
124
|
-
"day": datetime.now().strftime("%Y.%m.%d"),
|
|
125
|
-
"msg": record["message"],
|
|
126
|
-
"detail": error_detail,
|
|
127
|
-
"ip": ip,
|
|
128
|
-
"hostName": host_name,
|
|
129
|
-
"tenantId": "",
|
|
130
|
-
"userId": "",
|
|
131
|
-
"customerId": "",
|
|
132
|
-
"env": Config().config.get('Nacos', {}).get('namespaceId', ''),
|
|
133
|
-
"priReqSource": "",
|
|
134
|
-
"reqSource": "",
|
|
135
|
-
"serviceId": self.service_id,
|
|
136
|
-
"logLevel": record["level"].name,
|
|
137
|
-
"className": class_name,
|
|
138
|
-
"method": record["function"],
|
|
139
|
-
"line": str(record["line"]),
|
|
140
|
-
"theadName": thread_info,
|
|
141
|
-
"sqlCost": 0,
|
|
142
|
-
"size": len(str(record["message"])),
|
|
143
|
-
"uid": int(Snowflake.id)
|
|
144
|
-
}
|
|
145
|
-
|
|
146
|
-
# 8. 发送
|
|
147
|
-
self._producer.send("shengye-json-log", log_entry)
|
|
148
|
-
|
|
149
|
-
except Exception as e:
|
|
150
|
-
print(f"KafkaSink Error: {e}")
|
|
151
|
-
|
|
152
|
-
def flush(self):
|
|
153
|
-
if self._producer:
|
|
154
|
-
self._producer.flush(timeout=5)
|
|
78
|
+
# 启动后台发送线程
|
|
79
|
+
KafkaLogger._sender_thread = threading.Thread(
|
|
80
|
+
target=KafkaLogger._send_logs,
|
|
81
|
+
daemon=True
|
|
82
|
+
)
|
|
83
|
+
KafkaLogger._sender_thread.start()
|
|
155
84
|
|
|
85
|
+
# 注册退出处理
|
|
86
|
+
atexit.register(KafkaLogger.close)
|
|
156
87
|
|
|
157
|
-
|
|
158
|
-
|
|
88
|
+
# 设置全局异常处理器
|
|
89
|
+
sys.excepthook = KafkaLogger._handle_exception
|
|
159
90
|
|
|
160
|
-
|
|
161
|
-
|
|
91
|
+
def custom_log_handler(record):
|
|
92
|
+
# 检查record是否是Message对象
|
|
93
|
+
if isinstance(record, loguru._handler.Message):
|
|
94
|
+
# 从Message对象中获取原始日志记录
|
|
95
|
+
record = record.record
|
|
96
|
+
|
|
97
|
+
# 提取基本信息
|
|
98
|
+
message = record["message"]
|
|
99
|
+
level = record["level"].name
|
|
100
|
+
time_str = record["time"].strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
|
|
101
|
+
|
|
102
|
+
# 提取文件、函数和行号信息
|
|
103
|
+
file_info = record["file"].name
|
|
104
|
+
function_info = record["function"]
|
|
105
|
+
line_info = record["line"]
|
|
106
|
+
|
|
107
|
+
# 尝试从message中提取trace_id
|
|
108
|
+
trace_id = None
|
|
109
|
+
try:
|
|
110
|
+
if isinstance(message, str):
|
|
111
|
+
msg_dict = json.loads(message)
|
|
112
|
+
trace_id = msg_dict.get("trace_id")
|
|
113
|
+
except json.JSONDecodeError:
|
|
114
|
+
trace_id = None
|
|
115
|
+
|
|
116
|
+
if not trace_id:
|
|
117
|
+
trace_id = SYLogger.get_trace_id() or Snowflake.id
|
|
118
|
+
|
|
119
|
+
# 获取线程/协程信息
|
|
120
|
+
thread_info = SYLogger._get_execution_context()
|
|
121
|
+
|
|
122
|
+
# 获取主机信息
|
|
123
|
+
try:
|
|
124
|
+
ip = socket.gethostbyname(socket.gethostname())
|
|
125
|
+
except socket.gaierror:
|
|
126
|
+
ip = '127.0.0.1'
|
|
127
|
+
host_name = socket.gethostname()
|
|
128
|
+
|
|
129
|
+
# 检查是否有错误信息并设置detail字段
|
|
130
|
+
error_detail = ""
|
|
131
|
+
if level == "ERROR" and record["exception"] is not None:
|
|
132
|
+
error_detail = "".join(traceback.format_exception(
|
|
133
|
+
record["exception"].type,
|
|
134
|
+
record["exception"].value,
|
|
135
|
+
record["exception"].traceback
|
|
136
|
+
))
|
|
137
|
+
|
|
138
|
+
# 获取logger名称作为类名
|
|
139
|
+
class_name = record["name"]
|
|
140
|
+
|
|
141
|
+
# 合并文件名和类名信息
|
|
142
|
+
if file_info and class_name:
|
|
143
|
+
full_class_name = f"{file_info}:{class_name}"
|
|
144
|
+
elif file_info:
|
|
145
|
+
full_class_name = file_info
|
|
146
|
+
else:
|
|
147
|
+
full_class_name = class_name
|
|
148
|
+
|
|
149
|
+
# 构建日志条目
|
|
150
|
+
log_entry = {
|
|
151
|
+
"traceId": trace_id,
|
|
152
|
+
"sySpanId": "",
|
|
153
|
+
"syBizId": "",
|
|
154
|
+
"ptxId": "",
|
|
155
|
+
"time": time_str,
|
|
156
|
+
"day": datetime.now().strftime("%Y.%m.%d"),
|
|
157
|
+
"msg": message,
|
|
158
|
+
"detail": error_detail,
|
|
159
|
+
"ip": ip,
|
|
160
|
+
"hostName": host_name,
|
|
161
|
+
"tenantId": "",
|
|
162
|
+
"userId": "",
|
|
163
|
+
"customerId": "",
|
|
164
|
+
"env": Config().config['Nacos']['namespaceId'],
|
|
165
|
+
"priReqSource": "",
|
|
166
|
+
"reqSource": "",
|
|
167
|
+
"serviceId": KafkaLogger._service_id,
|
|
168
|
+
"logLevel": level,
|
|
169
|
+
"classShortName": "",
|
|
170
|
+
"method": "",
|
|
171
|
+
"line": "",
|
|
172
|
+
"theadName": thread_info,
|
|
173
|
+
"className": "",
|
|
174
|
+
"sqlCost": 0,
|
|
175
|
+
"size": len(str(message)),
|
|
176
|
+
"uid": int(Snowflake.id) # 独立新的id
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
# 智能队列管理
|
|
180
|
+
if not KafkaLogger._safe_put_to_queue(log_entry):
|
|
181
|
+
logger.warning(json.dumps({
|
|
182
|
+
"trace_id": trace_id,
|
|
183
|
+
"message": "Log queue is full, log discarded",
|
|
184
|
+
"level": "WARNING"
|
|
185
|
+
}, ensure_ascii=False))
|
|
186
|
+
|
|
187
|
+
# 配置日志处理器
|
|
162
188
|
logger.remove()
|
|
163
189
|
|
|
164
|
-
|
|
165
|
-
service_id = NacosService(config).service_name
|
|
166
|
-
|
|
167
|
-
KafkaLogger._sink_instance = KafkaSink(service_id)
|
|
168
|
-
|
|
190
|
+
# 添加Kafka日志处理器
|
|
169
191
|
logger.add(
|
|
170
|
-
|
|
192
|
+
custom_log_handler,
|
|
171
193
|
level="INFO",
|
|
172
|
-
|
|
173
|
-
enqueue=True,
|
|
174
|
-
backtrace=True,
|
|
175
|
-
diagnose=True
|
|
194
|
+
enqueue=True # 使用Loguru的队列功能
|
|
176
195
|
)
|
|
177
196
|
|
|
197
|
+
# 添加控制台错误日志处理器
|
|
178
198
|
logger.add(
|
|
179
199
|
sink=sys.stdout,
|
|
180
200
|
level="ERROR",
|
|
181
201
|
format=LOGURU_FORMAT,
|
|
182
|
-
colorize=True,
|
|
183
|
-
|
|
184
|
-
diagnose=True
|
|
202
|
+
colorize=True, # 启用颜色
|
|
203
|
+
filter=lambda record: record["level"].name == "ERROR"
|
|
185
204
|
)
|
|
186
205
|
|
|
187
|
-
sys.excepthook = KafkaLogger._handle_exception
|
|
188
|
-
|
|
189
206
|
@staticmethod
|
|
190
207
|
def _handle_exception(exc_type, exc_value, exc_traceback):
|
|
208
|
+
"""全局异常处理器"""
|
|
209
|
+
# 跳过键盘中断(Ctrl+C)
|
|
191
210
|
if issubclass(exc_type, KeyboardInterrupt):
|
|
192
211
|
sys.__excepthook__(exc_type, exc_value, exc_traceback)
|
|
193
212
|
return
|
|
194
213
|
|
|
195
|
-
trace_id
|
|
196
|
-
|
|
214
|
+
# 获取当前的trace_id
|
|
215
|
+
trace_id = SYLogger.get_trace_id() or Snowflake.id
|
|
216
|
+
|
|
217
|
+
# 构建错误日志
|
|
218
|
+
error_log = {
|
|
197
219
|
"trace_id": trace_id,
|
|
198
|
-
"message": f"Uncaught exception: {exc_type.__name__}",
|
|
199
|
-
"level": "ERROR"
|
|
200
|
-
|
|
220
|
+
"message": f"Uncaught exception: {exc_type.__name__}: {str(exc_value)}",
|
|
221
|
+
"level": "ERROR",
|
|
222
|
+
"detail": "".join(traceback.format_exception(exc_type, exc_value, exc_traceback))
|
|
223
|
+
}
|
|
201
224
|
|
|
202
|
-
|
|
203
|
-
|
|
225
|
+
# 使用Loguru记录错误,确保包含完整堆栈跟踪
|
|
226
|
+
logger.opt(exception=(exc_type, exc_value, exc_traceback)).error(
|
|
227
|
+
json.dumps(error_log, ensure_ascii=False)
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
@staticmethod
|
|
231
|
+
def _safe_put_to_queue(log_entry):
|
|
232
|
+
"""安全放入队列,提供更健壮的队列管理"""
|
|
233
|
+
try:
|
|
234
|
+
# 检查队列水位并发出警告
|
|
235
|
+
current_time = time.time()
|
|
236
|
+
qsize = KafkaLogger._log_queue.qsize()
|
|
237
|
+
|
|
238
|
+
if qsize > KafkaLogger._queue_warning_threshold:
|
|
239
|
+
if current_time - KafkaLogger._last_queue_warning > KafkaLogger._queue_warning_interval:
|
|
240
|
+
warning_msg = f"Log queue at {qsize}/{KafkaLogger._log_queue.maxsize} capacity"
|
|
241
|
+
print(warning_msg)
|
|
242
|
+
logger.warning(json.dumps({
|
|
243
|
+
"trace_id": log_entry.get("traceId"),
|
|
244
|
+
"message": warning_msg,
|
|
245
|
+
"level": "WARNING"
|
|
246
|
+
}, ensure_ascii=False))
|
|
247
|
+
KafkaLogger._last_queue_warning = current_time
|
|
248
|
+
|
|
249
|
+
# 尝试快速放入
|
|
250
|
+
KafkaLogger._log_queue.put(log_entry, block=False)
|
|
251
|
+
return True
|
|
252
|
+
except Full:
|
|
253
|
+
# 队列已满时的处理策略
|
|
254
|
+
if KafkaLogger._stop_event.is_set():
|
|
255
|
+
# 关闭过程中直接丢弃日志
|
|
256
|
+
return False
|
|
257
|
+
|
|
258
|
+
# 尝试移除最旧的日志并添加新日志
|
|
259
|
+
try:
|
|
260
|
+
with threading.Lock(): # 添加锁确保操作原子性
|
|
261
|
+
if not KafkaLogger._log_queue.empty():
|
|
262
|
+
KafkaLogger._log_queue.get_nowait()
|
|
263
|
+
KafkaLogger._log_queue.put_nowait(log_entry)
|
|
264
|
+
return True
|
|
265
|
+
except Exception:
|
|
266
|
+
return False
|
|
267
|
+
|
|
268
|
+
@staticmethod
|
|
269
|
+
def _send_logs():
|
|
270
|
+
"""后台线程:批量发送日志到Kafka,优化内存使用"""
|
|
271
|
+
batch = []
|
|
272
|
+
last_flush = time.time()
|
|
273
|
+
batch_size = 100
|
|
274
|
+
flush_interval = 1 # 秒
|
|
275
|
+
consecutive_errors = 0
|
|
276
|
+
max_consecutive_errors = 10 # 最大连续错误数,超过则降低处理速度
|
|
277
|
+
last_reconnect_attempt = 0
|
|
278
|
+
reconnect_interval = 30 # 重新连接尝试间隔,秒
|
|
279
|
+
|
|
280
|
+
while not KafkaLogger._stop_event.is_set():
|
|
281
|
+
try:
|
|
282
|
+
# 检查生产者状态,如果长时间失败,尝试重新创建生产者
|
|
283
|
+
current_time = time.time()
|
|
284
|
+
if consecutive_errors > max_consecutive_errors and current_time - last_reconnect_attempt > reconnect_interval:
|
|
285
|
+
logger.warning(json.dumps({
|
|
286
|
+
"trace_id": "system",
|
|
287
|
+
"message": "尝试重新创建Kafka生产者以解决连接问题",
|
|
288
|
+
"level": "WARNING"
|
|
289
|
+
}, ensure_ascii=False))
|
|
290
|
+
last_reconnect_attempt = current_time
|
|
291
|
+
|
|
292
|
+
# 尝试重新创建生产者
|
|
293
|
+
try:
|
|
294
|
+
# 使用类变量中存储的配置
|
|
295
|
+
from sycommon.synacos.nacos_service import NacosService
|
|
296
|
+
common = NacosService(
|
|
297
|
+
KafkaLogger._config).share_configs.get("common.yml", {})
|
|
298
|
+
bootstrap_servers = common.get("log", {}).get(
|
|
299
|
+
"kafka", {}).get("servers", None)
|
|
300
|
+
|
|
301
|
+
# 关闭旧生产者
|
|
302
|
+
if KafkaLogger._producer:
|
|
303
|
+
KafkaLogger._producer.close(timeout=5)
|
|
304
|
+
|
|
305
|
+
# 创建新生产者
|
|
306
|
+
KafkaLogger._producer = KafkaProducer(
|
|
307
|
+
bootstrap_servers=bootstrap_servers,
|
|
308
|
+
value_serializer=lambda v: json.dumps(
|
|
309
|
+
v, ensure_ascii=False).encode('utf-8'),
|
|
310
|
+
max_block_ms=60000,
|
|
311
|
+
retries=10,
|
|
312
|
+
request_timeout_ms=30000,
|
|
313
|
+
compression_type='gzip',
|
|
314
|
+
batch_size=16384,
|
|
315
|
+
linger_ms=5,
|
|
316
|
+
buffer_memory=67108864,
|
|
317
|
+
connections_max_idle_ms=540000,
|
|
318
|
+
reconnect_backoff_max_ms=10000,
|
|
319
|
+
)
|
|
320
|
+
consecutive_errors = 0
|
|
321
|
+
logger.info(json.dumps({
|
|
322
|
+
"trace_id": "system",
|
|
323
|
+
"message": "Kafka生产者已重新创建",
|
|
324
|
+
"level": "INFO"
|
|
325
|
+
}, ensure_ascii=False))
|
|
326
|
+
except Exception as e:
|
|
327
|
+
logger.error(json.dumps({
|
|
328
|
+
"trace_id": "system",
|
|
329
|
+
"message": f"重新创建Kafka生产者失败: {str(e)}",
|
|
330
|
+
"level": "ERROR"
|
|
331
|
+
}, ensure_ascii=False))
|
|
332
|
+
|
|
333
|
+
# 批量获取日志
|
|
334
|
+
while len(batch) < batch_size and not KafkaLogger._stop_event.is_set():
|
|
335
|
+
try:
|
|
336
|
+
# 使用超时获取,避免长时间阻塞
|
|
337
|
+
log_entry = KafkaLogger._log_queue.get(timeout=0.5)
|
|
338
|
+
batch.append(log_entry)
|
|
339
|
+
except Empty:
|
|
340
|
+
break
|
|
341
|
+
|
|
342
|
+
# 定时或定量发送
|
|
343
|
+
current_time = time.time()
|
|
344
|
+
if batch and (len(batch) >= batch_size or (current_time - last_flush > flush_interval)):
|
|
345
|
+
try:
|
|
346
|
+
# 分批发送,避免一次发送过大
|
|
347
|
+
sub_batch_size = min(50, batch_size)
|
|
348
|
+
for i in range(0, len(batch), sub_batch_size):
|
|
349
|
+
sub_batch = batch[i:i+sub_batch_size]
|
|
350
|
+
for entry in sub_batch:
|
|
351
|
+
KafkaLogger._producer.send(
|
|
352
|
+
KafkaLogger._topic, entry)
|
|
353
|
+
KafkaLogger._producer.flush(timeout=15)
|
|
354
|
+
|
|
355
|
+
batch = [] # 发送成功后清空批次
|
|
356
|
+
last_flush = current_time
|
|
357
|
+
consecutive_errors = 0 # 重置错误计数
|
|
358
|
+
except Exception as e:
|
|
359
|
+
consecutive_errors += 1
|
|
360
|
+
error_msg = f"Kafka发送失败: {e}"
|
|
361
|
+
print(error_msg)
|
|
362
|
+
logger.error(json.dumps({
|
|
363
|
+
"trace_id": "system",
|
|
364
|
+
"message": error_msg,
|
|
365
|
+
"level": "ERROR"
|
|
366
|
+
}, ensure_ascii=False))
|
|
367
|
+
|
|
368
|
+
# 连续错误过多时增加休眠时间,避免CPU空转
|
|
369
|
+
if consecutive_errors > max_consecutive_errors:
|
|
370
|
+
sleep_time = min(5, consecutive_errors // 2)
|
|
371
|
+
time.sleep(sleep_time)
|
|
372
|
+
|
|
373
|
+
except Exception as e:
|
|
374
|
+
print(f"日志处理线程异常: {e}")
|
|
375
|
+
time.sleep(1) # 短暂休眠恢复
|
|
376
|
+
|
|
377
|
+
# 退出前发送剩余日志
|
|
378
|
+
if batch:
|
|
379
|
+
try:
|
|
380
|
+
for entry in batch:
|
|
381
|
+
KafkaLogger._producer.send(KafkaLogger._topic, entry)
|
|
382
|
+
KafkaLogger._producer.flush(
|
|
383
|
+
timeout=KafkaLogger._shutdown_timeout)
|
|
384
|
+
except Exception as e:
|
|
385
|
+
print(f"关闭时发送剩余日志失败: {e}")
|
|
204
386
|
|
|
205
387
|
@staticmethod
|
|
206
388
|
def close():
|
|
207
|
-
|
|
208
|
-
|
|
389
|
+
"""安全关闭资源,增强可靠性"""
|
|
390
|
+
if KafkaLogger._stop_event.is_set():
|
|
391
|
+
return
|
|
392
|
+
|
|
393
|
+
print("开始关闭Kafka日志系统...")
|
|
394
|
+
KafkaLogger._stop_event.set()
|
|
395
|
+
|
|
396
|
+
# 等待发送线程结束
|
|
397
|
+
if KafkaLogger._sender_thread and KafkaLogger._sender_thread.is_alive():
|
|
398
|
+
print(f"等待日志发送线程结束,超时时间: {KafkaLogger._shutdown_timeout}秒")
|
|
399
|
+
KafkaLogger._sender_thread.join(
|
|
400
|
+
timeout=KafkaLogger._shutdown_timeout)
|
|
401
|
+
|
|
402
|
+
# 如果线程仍在运行,强制终止(虽然daemon线程会自动终止,但这里显式处理)
|
|
403
|
+
if KafkaLogger._sender_thread.is_alive():
|
|
404
|
+
print("日志发送线程未能及时结束,将被强制终止")
|
|
405
|
+
|
|
406
|
+
# 关闭生产者
|
|
407
|
+
if KafkaLogger._producer:
|
|
408
|
+
try:
|
|
409
|
+
print("关闭Kafka生产者...")
|
|
410
|
+
KafkaLogger._producer.close(
|
|
411
|
+
timeout=KafkaLogger._shutdown_timeout)
|
|
412
|
+
print("Kafka生产者已关闭")
|
|
413
|
+
except Exception as e:
|
|
414
|
+
print(f"关闭Kafka生产者失败: {e}")
|
|
415
|
+
|
|
416
|
+
# 清空队列防止内存滞留
|
|
417
|
+
remaining = 0
|
|
418
|
+
while not KafkaLogger._log_queue.empty():
|
|
419
|
+
try:
|
|
420
|
+
KafkaLogger._log_queue.get_nowait()
|
|
421
|
+
remaining += 1
|
|
422
|
+
except Empty:
|
|
423
|
+
break
|
|
424
|
+
|
|
425
|
+
print(f"已清空日志队列,剩余日志数: {remaining}")
|
|
209
426
|
|
|
210
427
|
|
|
211
428
|
class SYLogger:
|
|
212
429
|
@staticmethod
|
|
213
430
|
def get_trace_id():
|
|
431
|
+
"""从上下文中获取当前的 trace_id"""
|
|
214
432
|
return current_trace_id.get()
|
|
215
433
|
|
|
216
434
|
@staticmethod
|
|
217
435
|
def set_trace_id(trace_id: str):
|
|
436
|
+
"""设置当前的 trace_id"""
|
|
218
437
|
return current_trace_id.set(trace_id)
|
|
219
438
|
|
|
220
439
|
@staticmethod
|
|
221
440
|
def reset_trace_id(token):
|
|
441
|
+
"""重置当前的 trace_id"""
|
|
222
442
|
current_trace_id.reset(token)
|
|
223
443
|
|
|
224
444
|
@staticmethod
|
|
@@ -235,59 +455,64 @@ class SYLogger:
|
|
|
235
455
|
|
|
236
456
|
@staticmethod
|
|
237
457
|
def _get_execution_context() -> str:
|
|
458
|
+
"""获取当前执行上下文的线程或协程信息,返回格式化字符串"""
|
|
238
459
|
try:
|
|
460
|
+
# 尝试获取协程信息
|
|
239
461
|
task = asyncio.current_task()
|
|
240
462
|
if task:
|
|
241
|
-
|
|
463
|
+
task_name = task.get_name()
|
|
464
|
+
return f"coroutine:{task_name}"
|
|
242
465
|
except RuntimeError:
|
|
243
|
-
|
|
244
|
-
|
|
466
|
+
# 不在异步上下文中,获取线程信息
|
|
467
|
+
thread = threading.current_thread()
|
|
468
|
+
return f"thread:{thread.name}"
|
|
469
|
+
|
|
470
|
+
return "unknown"
|
|
245
471
|
|
|
246
472
|
@staticmethod
|
|
247
473
|
def _log(msg: any, level: str = "INFO"):
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
修复:手动提取堆栈信息并写入 message,确保 Kafka 能收到
|
|
251
|
-
"""
|
|
252
|
-
# 序列化消息
|
|
474
|
+
trace_id = SYLogger.get_trace_id() or Snowflake.id
|
|
475
|
+
|
|
253
476
|
if isinstance(msg, dict) or isinstance(msg, list):
|
|
254
477
|
msg_str = json.dumps(msg, ensure_ascii=False)
|
|
255
478
|
else:
|
|
256
479
|
msg_str = str(msg)
|
|
257
480
|
|
|
258
|
-
#
|
|
259
|
-
|
|
260
|
-
"trace_id": str(SYLogger.get_trace_id() or Snowflake.id),
|
|
261
|
-
"message": msg_str,
|
|
262
|
-
"level": level,
|
|
263
|
-
"threadName": SYLogger._get_execution_context()
|
|
264
|
-
}
|
|
481
|
+
# 获取执行上下文信息并格式化为字符串
|
|
482
|
+
thread_info = SYLogger._get_execution_context()
|
|
265
483
|
|
|
266
|
-
#
|
|
484
|
+
# 构建日志结构,添加线程/协程信息到threadName字段
|
|
485
|
+
request_log = {}
|
|
267
486
|
if level == "ERROR":
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
487
|
+
request_log = {
|
|
488
|
+
"trace_id": str(trace_id) if trace_id else Snowflake.id,
|
|
489
|
+
"message": msg_str,
|
|
490
|
+
"traceback": traceback.format_exc(),
|
|
491
|
+
"level": level,
|
|
492
|
+
"threadName": thread_info
|
|
493
|
+
}
|
|
494
|
+
else:
|
|
495
|
+
request_log = {
|
|
496
|
+
"trace_id": str(trace_id) if trace_id else Snowflake.id,
|
|
497
|
+
"message": msg_str,
|
|
498
|
+
"level": level,
|
|
499
|
+
"threadName": thread_info
|
|
500
|
+
}
|
|
278
501
|
|
|
502
|
+
# 选择日志级别
|
|
503
|
+
_log = ''
|
|
279
504
|
if level == "ERROR":
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
# 但这没关系,因为 KafkaSink 解析 message 字符串时会重新读取 detail
|
|
283
|
-
logger.opt(exception=True).error(log_json)
|
|
505
|
+
_log = json.dumps(request_log, ensure_ascii=False)
|
|
506
|
+
logger.error(_log)
|
|
284
507
|
elif level == "WARNING":
|
|
285
|
-
|
|
508
|
+
_log = json.dumps(request_log, ensure_ascii=False)
|
|
509
|
+
logger.warning(_log)
|
|
286
510
|
else:
|
|
287
|
-
|
|
511
|
+
_log = json.dumps(request_log, ensure_ascii=False)
|
|
512
|
+
logger.info(_log)
|
|
288
513
|
|
|
289
|
-
if
|
|
290
|
-
|
|
514
|
+
if os.getenv('DEV-LOG', 'false').lower() == 'true':
|
|
515
|
+
pprint.pprint(_log)
|
|
291
516
|
|
|
292
517
|
@staticmethod
|
|
293
518
|
def info(msg: any, *args, **kwargs):
|
|
@@ -307,4 +532,25 @@ class SYLogger:
|
|
|
307
532
|
|
|
308
533
|
@staticmethod
|
|
309
534
|
def exception(msg: any, *args, **kwargs):
|
|
310
|
-
|
|
535
|
+
"""记录异常信息,包括完整堆栈"""
|
|
536
|
+
trace_id = SYLogger.get_trace_id() or Snowflake.id
|
|
537
|
+
|
|
538
|
+
if isinstance(msg, dict) or isinstance(msg, list):
|
|
539
|
+
msg_str = json.dumps(msg, ensure_ascii=False)
|
|
540
|
+
else:
|
|
541
|
+
msg_str = str(msg)
|
|
542
|
+
|
|
543
|
+
# 获取执行上下文信息
|
|
544
|
+
thread_info = SYLogger._get_execution_context()
|
|
545
|
+
|
|
546
|
+
# 构建包含异常堆栈的日志
|
|
547
|
+
request_log = {
|
|
548
|
+
"trace_id": str(trace_id) if trace_id else Snowflake.id,
|
|
549
|
+
"message": msg_str,
|
|
550
|
+
"level": "ERROR",
|
|
551
|
+
"threadName": thread_info
|
|
552
|
+
}
|
|
553
|
+
|
|
554
|
+
# 使用Loguru记录完整异常堆栈
|
|
555
|
+
logger.opt(exception=True).error(
|
|
556
|
+
json.dumps(request_log, ensure_ascii=False))
|