sycommon-python-lib 0.1.33__py3-none-any.whl → 0.1.35__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sycommon-python-lib might be problematic. Click here for more details.

@@ -25,6 +25,7 @@ class RabbitMQClient:
25
25
  """
26
26
  RabbitMQ客户端(基于连接池),支持集群多节点配置
27
27
  提供自动故障转移、连接恢复和消息可靠性保障
28
+ 采用细粒度锁设计,彻底避免死锁隐患
28
29
  """
29
30
 
30
31
  def __init__(
@@ -70,18 +71,19 @@ class RabbitMQClient:
70
71
  # 消息处理参数
71
72
  self.consumption_stall_threshold = consumption_stall_threshold
72
73
 
73
- # 通道和资源对象(从池获取)
74
+ # 通道和资源对象(由 _connection_lock 保护)
74
75
  self.channel: Optional[AbstractChannel] = None
75
76
  self.exchange: Optional[AbstractExchange] = None
76
77
  self.queue: Optional[AbstractQueue] = None
77
78
 
78
- # 状态跟踪
79
+ # 状态跟踪(按类型拆分锁保护)
79
80
  self.actual_queue_name: Optional[str] = None
80
- self._exchange_exists = False
81
- self._queue_exists = False
82
- self._queue_bound = False
83
- self._is_consuming = False # 核心消费状态标志
84
- self._closed = False
81
+ self._exchange_exists = False # 由 _connection_lock 保护
82
+ self._queue_exists = False # 由 _connection_lock 保护
83
+ self._queue_bound = False # 由 _connection_lock 保护
84
+ self._is_consuming = False # 由 _consume_state_lock 保护
85
+ self._closed = False # 由 _connection_lock 保护
86
+ # 由 _consume_state_lock 保护
85
87
  self._consumer_tag: Optional[ConsumerTag] = None
86
88
  self._last_activity_timestamp = asyncio.get_event_loop().time()
87
89
  self._last_message_processed = asyncio.get_event_loop().time()
@@ -90,39 +92,156 @@ class RabbitMQClient:
90
92
  self.message_handler: Optional[Callable[
91
93
  [Union[Dict[str, Any], str], AbstractIncomingMessage],
92
94
  Coroutine[Any, Any, None]
93
- ]] = None
95
+ ]] = None # 由 _consume_state_lock 保护
94
96
  self._consuming_task: Optional[asyncio.Task] = None
95
97
  self._reconnect_task: Optional[asyncio.Task] = None
96
98
  self._keepalive_task: Optional[asyncio.Task] = None
97
99
  self._monitor_task: Optional[asyncio.Task] = None
98
100
 
99
- # 消息处理跟踪
101
+ # 消息处理跟踪(由 _tracking_lock 保护)
100
102
  self._tracking_messages: Dict[str, Dict[str, Any]] = {}
101
103
 
104
+ # 细粒度锁(核心设计:按资源类型拆分,避免嵌套)
105
+ # 保护消费状态(_is_consuming、message_handler、_consumer_tag)
106
+ self._consume_state_lock = asyncio.Lock()
107
+ self._tracking_lock = asyncio.Lock() # 保护消息跟踪记录(_tracking_messages)
108
+ # 保护连接/资源状态(channel、exchange、queue、_closed等)
109
+ self._connection_lock = asyncio.Lock()
110
+
102
111
  @property
103
- def is_connected(self) -> bool:
104
- """检查当前通道是否有效"""
105
- return (not self._closed and
106
- self.channel is not None and
107
- not self.channel.is_closed and
108
- self.exchange is not None)
112
+ async def is_connected(self) -> bool:
113
+ """异步属性:检查当前通道是否有效(线程安全)"""
114
+ async with self._connection_lock:
115
+ return (not self._closed and
116
+ self.channel is not None and
117
+ not self.channel.is_closed and
118
+ self.exchange is not None)
109
119
 
110
120
  def _update_activity_timestamp(self) -> None:
111
- """更新最后活动时间戳"""
121
+ """更新最后活动时间戳(非共享状态,无需锁)"""
112
122
  self._last_activity_timestamp = asyncio.get_event_loop().time()
113
123
 
114
124
  def _update_message_processed_timestamp(self) -> None:
115
- """更新最后消息处理时间戳"""
125
+ """更新最后消息处理时间戳(非共享状态,无需锁)"""
116
126
  self._last_message_processed = asyncio.get_event_loop().time()
117
127
 
118
- def _set_is_consuming(self, value: bool) -> None:
119
- """安全更新消费状态并记录日志"""
120
- if self._is_consuming != value:
121
- logger.info(f"消费状态变更: {self._is_consuming} {value}")
122
- self._is_consuming = value
123
-
128
+ # ------------------------------
129
+ # 消费状态操作(_consume_state_lock 专属保护)
130
+ # ------------------------------
131
+ async def _get_consume_state(self) -> tuple[bool, Optional[Callable], Optional[ConsumerTag]]:
132
+ """安全获取消费相关状态(一次性获取,避免多次加锁)"""
133
+ async with self._consume_state_lock:
134
+ return self._is_consuming, self.message_handler, self._consumer_tag
135
+
136
+ async def _set_consume_state(self, is_consuming: bool, consumer_tag: Optional[ConsumerTag] = None):
137
+ """安全更新消费状态(原子操作)"""
138
+ async with self._consume_state_lock:
139
+ old_is_consuming = self._is_consuming
140
+ self._is_consuming = is_consuming
141
+ if consumer_tag is not None:
142
+ self._consumer_tag = consumer_tag
143
+ if old_is_consuming != is_consuming:
144
+ logger.info(f"消费状态变更: {old_is_consuming} → {is_consuming}")
145
+
146
+ async def set_message_handler(self, handler):
147
+ """设置消息处理器(加锁保护,避免并发修改)"""
148
+ async with self._consume_state_lock:
149
+ self.message_handler = handler
150
+ logger.info("消息处理器已设置")
151
+
152
+ # ------------------------------
153
+ # 连接状态操作(_connection_lock 专属保护)
154
+ # ------------------------------
155
+ async def _is_closed(self) -> bool:
156
+ """检查客户端是否已关闭(线程安全)"""
157
+ async with self._connection_lock:
158
+ return self._closed
159
+
160
+ async def _mark_closed(self):
161
+ """标记客户端已关闭(原子操作)"""
162
+ async with self._connection_lock:
163
+ self._closed = True
164
+
165
+ async def _get_connection_resources(self) -> tuple[Optional[AbstractChannel], Optional[AbstractExchange], Optional[AbstractQueue]]:
166
+ """安全获取连接资源(channel/exchange/queue)"""
167
+ async with self._connection_lock:
168
+ return self.channel, self.exchange, self.queue
169
+
170
+ async def _reset_connection_state(self):
171
+ """重置连接状态(用于重连时,原子操作)"""
172
+ async with self._connection_lock:
173
+ self._exchange_exists = False
174
+ self._queue_exists = False
175
+ self._queue_bound = False
176
+ self.channel = None
177
+ self.exchange = None
178
+ self.queue = None
179
+ self.actual_queue_name = None
180
+
181
+ async def _update_connection_resources(self, channel: AbstractChannel, exchange: AbstractExchange, queue: Optional[AbstractQueue] = None):
182
+ """更新连接资源(原子操作)"""
183
+ async with self._connection_lock:
184
+ self.channel = channel
185
+ self.exchange = exchange
186
+ self.queue = queue
187
+ if queue:
188
+ self.actual_queue_name = queue.name
189
+
190
+ # ------------------------------
191
+ # 消息跟踪操作(_tracking_lock 专属保护)
192
+ # ------------------------------
193
+ async def _add_tracking_message(self, msg_id: str, delivery_tag: int, channel_number: Optional[int]):
194
+ """添加消息跟踪记录(原子操作)"""
195
+ async with self._tracking_lock:
196
+ self._tracking_messages[msg_id] = {
197
+ 'delivery_tag': delivery_tag,
198
+ 'acked': False,
199
+ 'channel_number': channel_number,
200
+ 'start_time': asyncio.get_event_loop().time()
201
+ }
202
+
203
+ async def _mark_tracking_acked(self, msg_id: str):
204
+ """标记消息已确认(原子操作)"""
205
+ async with self._tracking_lock:
206
+ if msg_id in self._tracking_messages:
207
+ self._tracking_messages[msg_id]['acked'] = True
208
+
209
+ async def _remove_tracking_message(self, msg_id: str):
210
+ """删除消息跟踪记录(原子操作,避免KeyError)"""
211
+ async with self._tracking_lock:
212
+ if msg_id in self._tracking_messages:
213
+ del self._tracking_messages[msg_id]
214
+ logger.info(f"已删除消息跟踪信息: {msg_id}")
215
+
216
+ async def _check_duplicate_message(self, msg_id: str) -> bool:
217
+ """检查消息是否重复处理(原子操作)"""
218
+ async with self._tracking_lock:
219
+ return msg_id in self._tracking_messages
220
+
221
+ async def _get_tracking_count(self) -> int:
222
+ """获取当前跟踪的消息数(原子操作)"""
223
+ async with self._tracking_lock:
224
+ return len(self._tracking_messages)
225
+
226
+ async def _cleanup_acked_tracking_messages(self) -> int:
227
+ """清理已确认的跟踪记录(原子操作,返回清理数量)"""
228
+ async with self._tracking_lock:
229
+ acked_ids = [
230
+ msg_id for msg_id, info in self._tracking_messages.items() if info.get('acked')]
231
+ for msg_id in acked_ids:
232
+ del self._tracking_messages[msg_id]
233
+ return len(acked_ids)
234
+
235
+ async def _clear_tracking_messages(self):
236
+ """清空所有跟踪记录(原子操作)"""
237
+ async with self._tracking_lock:
238
+ self._tracking_messages.clear()
239
+
240
+ # ------------------------------
241
+ # 基础工具方法
242
+ # ------------------------------
124
243
  async def _get_channel(self) -> AbstractChannel:
125
- """从通道池获取通道(使用上下文管理器)"""
244
+ """从通道池获取通道(使用上下文管理器,自动归还)"""
126
245
  if not self.connection_pool.channel_pool:
127
246
  raise Exception("连接池未初始化,请先调用init_pools")
128
247
 
@@ -161,7 +280,7 @@ class RabbitMQClient:
161
280
  return False
162
281
 
163
282
  async def _bind_queue(self, channel: AbstractChannel, queue: AbstractQueue, exchange: AbstractExchange) -> bool:
164
- """将队列绑定到交换机"""
283
+ """将队列绑定到交换机(带重试)"""
165
284
  bind_routing_key = self.routing_key if self.routing_key else '#'
166
285
 
167
286
  for attempt in range(MAX_RETRY_COUNT + 1):
@@ -174,7 +293,7 @@ class RabbitMQClient:
174
293
  timeout=self.rpc_timeout
175
294
  )
176
295
  logger.info(
177
- f"队列 '{self.queue_name}' 已绑定到交换机 '{self.exchange_name}',路由键: {bind_routing_key}")
296
+ f"队列 '{queue.name}' 已绑定到交换机 '{exchange.name}',路由键: {bind_routing_key}")
178
297
  return True
179
298
  except Exception as e:
180
299
  logger.warning(
@@ -183,6 +302,9 @@ class RabbitMQClient:
183
302
  await asyncio.sleep(1)
184
303
  return False
185
304
 
305
+ # ------------------------------
306
+ # 核心业务方法
307
+ # ------------------------------
186
308
  async def connect(self, force_reconnect: bool = False, declare_queue: bool = True) -> None:
187
309
  """从连接池获取资源并初始化(交换机、队列)"""
188
310
  logger.info(
@@ -190,32 +312,43 @@ class RabbitMQClient:
190
312
  f"declare_queue={declare_queue}, create_if_not_exists={self.create_if_not_exists}"
191
313
  )
192
314
 
193
- if self.is_connected and not force_reconnect:
315
+ # 检查是否已关闭
316
+ if await self._is_closed():
317
+ raise Exception("客户端已关闭,无法连接")
318
+
319
+ # 检查是否已连接(非强制重连则直接返回)
320
+ if await self.is_connected and not force_reconnect:
321
+ logger.info("已处于连接状态,无需重复连接")
194
322
  return
195
323
 
324
+ # 取消现有重连任务
196
325
  if self._reconnect_task and not self._reconnect_task.done():
197
326
  self._reconnect_task.cancel()
327
+ try:
328
+ await self._reconnect_task
329
+ except asyncio.CancelledError:
330
+ logger.info("旧重连任务已取消")
198
331
 
199
- # 重置状态
200
- self._exchange_exists = False
201
- self._queue_exists = False
202
- self._queue_bound = False
203
- self._set_is_consuming(False) # 连接时先停止消费
332
+ # 重置连接状态和跟踪记录
333
+ await self._reset_connection_state()
334
+ await self._clear_tracking_messages()
335
+ await self._set_consume_state(is_consuming=False)
204
336
 
205
337
  retries = 0
206
338
  last_exception = None
207
339
 
208
340
  while retries < self.max_reconnection_attempts:
209
341
  try:
210
- self.channel = await self._get_channel()
211
- await self.channel.set_qos(prefetch_count=self.prefetch_count)
342
+ # 获取新通道
343
+ channel = await self._get_channel()
344
+ await channel.set_qos(prefetch_count=self.prefetch_count)
212
345
 
213
346
  # 处理交换机
214
- exchange_exists = await self._check_exchange_exists(self.channel)
347
+ exchange_exists = await self._check_exchange_exists(channel)
215
348
  if not exchange_exists:
216
349
  if self.create_if_not_exists:
217
- self.exchange = await asyncio.wait_for(
218
- self.channel.declare_exchange(
350
+ exchange = await asyncio.wait_for(
351
+ channel.declare_exchange(
219
352
  name=self.exchange_name,
220
353
  type=self.exchange_type,
221
354
  durable=self.durable,
@@ -228,20 +361,21 @@ class RabbitMQClient:
228
361
  raise Exception(
229
362
  f"交换机 '{self.exchange_name}' 不存在且不允许自动创建")
230
363
  else:
231
- self.exchange = await self.channel.get_exchange(self.exchange_name)
364
+ exchange = await channel.get_exchange(self.exchange_name)
232
365
  logger.info(f"使用已存在的交换机 '{self.exchange_name}'")
233
366
 
234
367
  # 处理队列
368
+ queue = None
235
369
  if declare_queue and self.queue_name:
236
- queue_exists = await self._check_queue_exists(self.channel)
370
+ queue_exists = await self._check_queue_exists(channel)
237
371
 
238
372
  if not queue_exists:
239
373
  if not self.create_if_not_exists:
240
374
  raise Exception(
241
375
  f"队列 '{self.queue_name}' 不存在且不允许自动创建")
242
376
 
243
- self.queue = await asyncio.wait_for(
244
- self.channel.declare_queue(
377
+ queue = await asyncio.wait_for(
378
+ channel.declare_queue(
245
379
  name=self.queue_name,
246
380
  durable=self.durable,
247
381
  auto_delete=self.auto_delete,
@@ -249,28 +383,27 @@ class RabbitMQClient:
249
383
  ),
250
384
  timeout=self.rpc_timeout
251
385
  )
252
- self.actual_queue_name = self.queue_name
253
386
  logger.info(f"已创建队列 '{self.queue_name}'")
254
387
  else:
255
- self.queue = await self.channel.get_queue(self.queue_name)
256
- self.actual_queue_name = self.queue_name
388
+ queue = await channel.get_queue(self.queue_name)
257
389
  logger.info(f"使用已存在的队列 '{self.queue_name}'")
258
390
 
259
391
  # 绑定队列到交换机
260
- if self.queue and self.exchange:
261
- bound = await self._bind_queue(self.channel, self.queue, self.exchange)
392
+ if queue and exchange:
393
+ bound = await self._bind_queue(channel, queue, exchange)
262
394
  if not bound:
263
- raise Exception(f"队列 '{self.queue_name}' 绑定到交换机失败")
264
- else:
265
- self.queue = None
266
- self.actual_queue_name = None
267
- logger.info(f"跳过队列 '{self.queue_name}' 的声明和绑定")
395
+ raise Exception(f"队列 '{queue.name}' 绑定到交换机失败")
396
+
397
+ # 更新连接资源
398
+ await self._update_connection_resources(channel, exchange, queue)
268
399
 
269
- if not self.is_connected:
400
+ # 验证连接状态
401
+ if not await self.is_connected:
270
402
  raise Exception("连接验证失败,状态异常")
271
403
 
272
- # 重新开始消费(如果之前在消费)
273
- if self.message_handler:
404
+ # 重新开始消费(如果已设置处理器)
405
+ is_consuming, handler, _ = await self._get_consume_state()
406
+ if handler:
274
407
  await self.start_consuming()
275
408
 
276
409
  # 启动监控和保活任务
@@ -278,15 +411,12 @@ class RabbitMQClient:
278
411
  self._start_keepalive()
279
412
 
280
413
  self._update_activity_timestamp()
281
- # 清理可能残留的跟踪记录
282
- self._tracking_messages.clear()
283
414
  logger.info(f"RabbitMQ客户端初始化成功 (队列: {self.actual_queue_name})")
284
415
  return
285
416
 
286
417
  except Exception as e:
287
418
  last_exception = e
288
419
  logger.warning(f"资源初始化失败: {str(e)},重试中...")
289
- self.channel = None
290
420
  retries += 1
291
421
  if retries < self.max_reconnection_attempts:
292
422
  await asyncio.sleep(self.reconnection_delay)
@@ -296,112 +426,146 @@ class RabbitMQClient:
296
426
  f"经过{self.max_reconnection_attempts}次重试后仍无法初始化客户端。最后错误: {str(last_exception)}")
297
427
 
298
428
  def _start_monitoring(self) -> None:
299
- """启动连接和消费监控任务"""
300
- if self._closed or (self._monitor_task and not self._monitor_task.done()):
429
+ """启动连接和消费监控任务(无锁,仅通过原子方法访问状态)"""
430
+ if self._monitor_task and not self._monitor_task.done():
301
431
  return
302
432
 
303
433
  async def monitor():
304
- while not self._closed and self.channel:
434
+ while not await self._is_closed():
305
435
  try:
306
436
  # 检查通道状态
307
- if self.channel.is_closed:
437
+ channel, _, _ = await self._get_connection_resources()
438
+ if channel and channel.is_closed:
308
439
  logger.warning("检测到通道已关闭,尝试重建")
309
440
  await self._recreate_channel()
310
441
  continue
311
442
 
312
443
  current_time = asyncio.get_event_loop().time()
313
- # 清理消息跟踪记录
314
- if self._tracking_messages:
315
- # 1. 清理已确认的消息
316
- acked_ids = [
317
- msg_id for msg_id, info in self._tracking_messages.items()
318
- if info.get('acked', False)
319
- ]
320
- for msg_id in acked_ids:
321
- del self._tracking_messages[msg_id]
322
- if acked_ids:
323
- logger.info(f"清理了 {len(acked_ids)} 条已确认消息记录")
324
-
325
- # 检查消费停滞
326
- if self._is_consuming:
444
+
445
+ # 清理已确认的跟踪记录
446
+ cleaned_count = await self._cleanup_acked_tracking_messages()
447
+ if cleaned_count > 0:
448
+ logger.info(f"清理了 {cleaned_count} 条已确认消息记录")
449
+
450
+ # 检查消费停滞(仅当消费状态为True时)
451
+ is_consuming, _, _ = await self._get_consume_state()
452
+ if is_consuming:
453
+ tracking_count = await self._get_tracking_count()
327
454
  if current_time - self._last_message_processed > self.consumption_stall_threshold:
328
- if self._tracking_messages:
455
+ if tracking_count > 0:
329
456
  logger.warning(
330
- f"消费停滞,但有 {len(self._tracking_messages)} 个消息正在处理,暂不重启")
457
+ f"消费停滞,但有 {tracking_count} 个消息正在处理,暂不重启")
331
458
  else:
332
- # 无有效消息,重启消费
333
459
  logger.info("消费停滞且无消息处理,重启消费")
334
- await self.stop_consuming()
335
- await asyncio.sleep(1)
336
- await self.start_consuming()
460
+ try:
461
+ await self.stop_consuming()
462
+ await asyncio.sleep(1)
463
+ # 检查处理器是否存在
464
+ _, handler, _ = await self._get_consume_state()
465
+ if handler:
466
+ await self.start_consuming()
467
+ else:
468
+ logger.error("消费处理器已丢失,无法重启消费")
469
+ except Exception as e:
470
+ logger.error(
471
+ f"重启消费失败: {str(e)}", exc_info=True)
472
+ await self._set_consume_state(is_consuming=False)
337
473
 
338
474
  except Exception as e:
339
- logger.error(f"监控任务出错: {str(e)}")
340
- await asyncio.sleep(1)
475
+ logger.error(f"监控任务出错: {str(e)}", exc_info=True)
341
476
 
342
- await asyncio.sleep(60)
477
+ await asyncio.sleep(60) # 监控间隔60秒
343
478
 
344
479
  self._monitor_task = asyncio.create_task(monitor())
480
+ logger.info("监控任务已启动")
345
481
 
346
482
  async def _recreate_channel(self) -> None:
483
+ """重建通道并恢复资源(无锁嵌套)"""
484
+ # 先停止消费
485
+ await self._set_consume_state(is_consuming=False)
486
+ logger.info("开始重建通道...")
487
+
347
488
  try:
348
- self.channel = await self._get_channel()
349
- await self.channel.set_qos(prefetch_count=self.prefetch_count)
489
+ # 获取新通道
490
+ channel = await self._get_channel()
491
+ await channel.set_qos(prefetch_count=self.prefetch_count)
350
492
 
351
- # 重新获取交换机和队列
352
- self.exchange = await self.channel.get_exchange(self.exchange_name)
493
+ # 重新获取交换机
494
+ exchange = await channel.get_exchange(self.exchange_name)
495
+
496
+ # 重新获取队列并绑定
497
+ queue = None
353
498
  if self.queue_name:
354
- self.queue = await self.channel.get_queue(self.queue_name)
355
- if self.queue and self.exchange:
356
- await self._bind_queue(self.channel, self.queue, self.exchange)
499
+ queue = await channel.get_queue(self.queue_name)
500
+ if queue and exchange:
501
+ bound = await self._bind_queue(channel, queue, exchange)
502
+ if not bound:
503
+ raise Exception("队列绑定失败,通道重建不完整")
504
+
505
+ # 更新连接资源
506
+ await self._update_connection_resources(channel, exchange, queue)
357
507
 
358
508
  # 重新开始消费
359
- if self.message_handler:
509
+ _, handler, _ = await self._get_consume_state()
510
+ if handler:
360
511
  await self.start_consuming()
361
512
 
513
+ # 清空跟踪记录
514
+ await self._clear_tracking_messages()
362
515
  logger.info("通道已重建并恢复服务")
363
516
  self._update_activity_timestamp()
364
517
  except Exception as e:
365
- logger.error(f"通道重建失败: {str(e)},触发重连")
518
+ logger.error(f"通道重建失败: {str(e)},触发重连", exc_info=True)
519
+ await self._set_consume_state(is_consuming=False)
366
520
  await self.connect(force_reconnect=True)
367
521
 
368
522
  def _start_keepalive(self) -> None:
369
- """启动连接保活任务"""
370
- if self._closed or (self._keepalive_task and not self._keepalive_task.done()):
523
+ """启动连接保活任务(无锁,仅通过原子方法访问状态)"""
524
+ if self._keepalive_task and not self._keepalive_task.done():
371
525
  return
372
526
 
373
527
  async def keepalive():
374
- while not self._closed and self.is_connected:
375
- current_time = asyncio.get_event_loop().time()
376
- if current_time - self._last_activity_timestamp > self.connection_pool.heartbeat * 2:
377
- logger.info(
378
- f"连接 {self.connection_pool.heartbeat*2}s 无活动,执行保活检查")
379
- try:
380
- if self.channel.is_closed:
381
- logger.warning("连接已关闭,触发重连")
382
- await self.connect(force_reconnect=True)
383
- return
384
-
385
- # 轻量级操作保持连接活跃
386
- await asyncio.wait_for(
387
- self.channel.declare_exchange(
388
- name=self.exchange_name,
389
- type=self.exchange_type,
390
- passive=True
391
- ),
392
- timeout=5
393
- )
394
- self._update_activity_timestamp()
395
- except Exception as e:
396
- logger.warning(f"保活检查失败: {str(e)},触发重连")
528
+ while not await self._is_closed():
529
+ try:
530
+ # 检查连接状态
531
+ if not await self.is_connected:
532
+ logger.warning("保活任务检测到连接断开,触发重连")
397
533
  await self.connect(force_reconnect=True)
534
+ await asyncio.sleep(5)
535
+ continue
536
+
537
+ current_time = asyncio.get_event_loop().time()
538
+ # 检查活动时间
539
+ if current_time - self._last_activity_timestamp > self.connection_pool.heartbeat * 2:
540
+ logger.info(
541
+ f"连接 {self.connection_pool.heartbeat*2}s 无活动,执行保活检查")
542
+ channel, exchange, _ = await self._get_connection_resources()
543
+ if channel and not channel.is_closed and exchange:
544
+ # 轻量级操作:检查交换机是否存在
545
+ await asyncio.wait_for(
546
+ channel.declare_exchange(
547
+ name=self.exchange_name,
548
+ type=self.exchange_type,
549
+ passive=True
550
+ ),
551
+ timeout=5
552
+ )
553
+ self._update_activity_timestamp()
554
+ logger.info("保活检查成功")
555
+ else:
556
+ raise Exception("连接资源无效")
557
+
558
+ except Exception as e:
559
+ logger.warning(f"保活检查失败: {str(e)},触发重连")
560
+ await self.connect(force_reconnect=True)
398
561
 
399
562
  await asyncio.sleep(self.connection_pool.heartbeat)
400
563
 
401
564
  self._keepalive_task = asyncio.create_task(keepalive())
565
+ logger.info("保活任务已启动")
402
566
 
403
567
  async def _schedule_reconnect(self) -> None:
404
- """安排重新连接"""
568
+ """安排重新连接(无锁)"""
405
569
  if self._reconnect_task and not self._reconnect_task.done():
406
570
  return
407
571
 
@@ -410,38 +574,48 @@ class RabbitMQClient:
410
574
  async def reconnect():
411
575
  try:
412
576
  await asyncio.sleep(self.reconnection_delay)
413
- if not self._closed:
577
+ if not await self._is_closed():
414
578
  await self.connect(force_reconnect=True)
415
579
  except Exception as e:
416
580
  logger.error(f"重连任务失败: {str(e)}")
417
- if not self._closed:
581
+ if not await self._is_closed():
418
582
  await self._schedule_reconnect()
419
583
 
420
584
  self._reconnect_task = asyncio.create_task(reconnect())
421
585
 
422
586
  async def close(self) -> None:
423
- """关闭客户端并释放资源"""
424
- self._closed = True
425
- self._set_is_consuming(False)
587
+ """关闭客户端并释放资源(原子操作,无锁嵌套)"""
588
+ if await self._is_closed():
589
+ logger.info("客户端已关闭,无需重复操作")
590
+ return
591
+
592
+ logger.info("开始关闭RabbitMQ客户端...")
593
+
594
+ # 标记为已关闭
595
+ await self._mark_closed()
596
+
597
+ # 停止消费
598
+ await self.stop_consuming()
426
599
 
427
- # 取消所有任务
428
- for task in [self._keepalive_task, self._reconnect_task,
429
- self._consuming_task, self._monitor_task]:
600
+ # 取消所有后台任务
601
+ tasks = [self._keepalive_task,
602
+ self._reconnect_task, self._monitor_task]
603
+ for task in tasks:
430
604
  if task and not task.done():
431
605
  task.cancel()
432
606
  try:
433
607
  await task
434
608
  except asyncio.CancelledError:
435
- pass
609
+ logger.info(f"任务 {task.get_name()} 已取消")
436
610
 
437
- # 重置状态
438
- self.channel = None
439
- self.exchange = None
440
- self.queue = None
441
- self._consumer_tag = None
442
- self._tracking_messages.clear()
611
+ # 重置所有状态和资源
612
+ await self._reset_connection_state()
613
+ await self._clear_tracking_messages()
614
+ async with self._consume_state_lock:
615
+ self.message_handler = None
616
+ self._consumer_tag = None
443
617
 
444
- logger.info("RabbitMQ客户端已关闭")
618
+ logger.info("RabbitMQ客户端已完全关闭")
445
619
 
446
620
  async def publish(
447
621
  self,
@@ -451,13 +625,14 @@ class RabbitMQClient:
451
625
  headers: Optional[Dict[str, Any]] = None,
452
626
  delivery_mode: DeliveryMode = DeliveryMode.PERSISTENT
453
627
  ) -> None:
454
- """发布消息(从池获取通道,自动重试)"""
455
- if not self.is_connected:
456
- logger.warning("连接已关闭,尝试重连后发布消息")
457
- await self.connect(force_reconnect=True)
628
+ """发布消息(从池获取通道,自动重试,无锁冲突)"""
629
+ if await self._is_closed():
630
+ raise Exception("客户端已关闭,无法发布消息")
458
631
 
459
- if not self.channel or not self.exchange:
460
- raise Exception("RabbitMQ连接未初始化")
632
+ # 检查连接状态
633
+ if not await self.is_connected:
634
+ logger.warning("连接已断开,尝试重连后发布消息")
635
+ await self.connect(force_reconnect=True)
461
636
 
462
637
  # 处理消息体
463
638
  if isinstance(message_body, dict):
@@ -492,13 +667,12 @@ class RabbitMQClient:
492
667
  raise Exception("消息未被服务器确认接收")
493
668
 
494
669
  self._update_activity_timestamp()
495
- logger.info(f"消息已发布到交换机 '{self.exchange_name}'")
670
+ logger.info(
671
+ f"消息已发布到交换机 '{self.exchange_name}'(路由键: {routing_key or self.routing_key or '#'})")
496
672
  return
497
673
  except (ConnectionClosed, ChannelInvalidStateError, asyncio.TimeoutError):
498
- # 覆盖更多异常类型
499
674
  retry_count += 1
500
675
  logger.warning(f"连接异常,尝试重连后重新发布 (重试次数: {retry_count})")
501
- # 主动刷新连接状态
502
676
  await self.connect(force_reconnect=True)
503
677
  except Exception as e:
504
678
  retry_count += 1
@@ -508,85 +682,113 @@ class RabbitMQClient:
508
682
 
509
683
  raise Exception(f"消息发布失败,经过{retry_count}次重试仍未成功")
510
684
 
511
- def set_message_handler(self, handler):
512
- self.message_handler = handler
685
+ async def _safe_cancel_consumer(self, consumer_tag: ConsumerTag, queue: AbstractQueue) -> bool:
686
+ """安全取消消费者(无锁,仅操作传入的局部变量)"""
687
+ try:
688
+ await asyncio.wait_for(
689
+ queue.cancel(consumer_tag),
690
+ timeout=self.rpc_timeout
691
+ )
692
+ logger.info(f"消费者 {consumer_tag} 已取消")
693
+ return True
694
+ except Exception as e:
695
+ logger.error(f"取消消费者 {consumer_tag} 异常: {str(e)}")
696
+ return False
513
697
 
514
698
  async def start_consuming(self) -> ConsumerTag:
515
- if self._is_consuming:
516
- logger.info("已经在消费中,返回现有consumer_tag")
517
- if self._consumer_tag:
518
- return self._consumer_tag
519
- logger.warning("检测到消费状态异常(无consumer_tag),重置状态后重试")
520
- self._set_is_consuming(False)
521
-
522
- if not self.is_connected:
699
+ """启动消费(无锁嵌套,通过原子方法获取/更新状态)"""
700
+ # 检查客户端状态
701
+ if await self._is_closed():
702
+ raise Exception("客户端已关闭,无法启动消费")
703
+
704
+ # 检查连接状态
705
+ if not await self.is_connected:
523
706
  await self.connect()
524
707
 
525
- if not self.queue:
526
- raise Exception("队列未初始化,无法开始消费")
708
+ # 获取消费状态和资源
709
+ is_consuming, handler, consumer_tag = await self._get_consume_state()
710
+ channel, exchange, queue = await self._get_connection_resources()
711
+
712
+ # 检查是否已在消费
713
+ if is_consuming and consumer_tag:
714
+ logger.info(f"已经在消费中,返回现有consumer_tag: {consumer_tag}")
715
+ return consumer_tag
527
716
 
528
- if not self.message_handler:
529
- raise Exception("未设置消息处理函数")
717
+ # 检查必要条件
718
+ if not handler:
719
+ raise Exception("未设置消息处理函数,请先调用set_message_handler")
720
+ if not queue:
721
+ raise Exception("队列未初始化,无法开始消费")
722
+ if not channel or channel.is_closed:
723
+ raise Exception("通道无效,无法开始消费")
530
724
 
531
725
  try:
532
- self._consumer_tag = await self.queue.consume(
726
+ # 启动消费
727
+ new_consumer_tag = await queue.consume(
533
728
  self._message_wrapper,
534
729
  no_ack=False # 手动确认消息
535
730
  )
536
731
 
537
- if not self._consumer_tag:
732
+ if not new_consumer_tag:
538
733
  raise Exception("未能获取到有效的consumer_tag")
539
734
 
540
- self._set_is_consuming(True)
735
+ # 更新消费状态
736
+ await self._set_consume_state(is_consuming=True, consumer_tag=new_consumer_tag)
541
737
  logger.info(
542
- f"消费者已启动,队列: {self.actual_queue_name}, tag: {self._consumer_tag}")
543
- return self._consumer_tag
738
+ f"消费者已启动,队列: {queue.name}, tag: {new_consumer_tag}")
739
+ return new_consumer_tag
544
740
  except Exception as e:
545
- self._set_is_consuming(False)
741
+ # 异常时回滚状态
742
+ await self._set_consume_state(is_consuming=False)
546
743
  logger.error(f"启动消费失败: {str(e)}", exc_info=True)
547
744
  raise
548
745
 
549
- async def _safe_cancel_consumer(self) -> bool:
550
- if not self._consumer_tag or not self.queue or not self.channel:
551
- return True
552
-
553
- try:
554
- await asyncio.wait_for(
555
- self.queue.cancel(self._consumer_tag),
556
- timeout=self.rpc_timeout
557
- )
558
- logger.info(f"消费者 {self._consumer_tag} 已取消")
559
- return True
560
- except Exception as e:
561
- logger.error(f"取消消费者异常: {str(e)}")
562
- return False
563
-
564
746
  async def stop_consuming(self) -> None:
565
- if not self._is_consuming:
747
+ """停止消费(无锁嵌套,通过原子方法获取/更新状态)"""
748
+ # 获取消费状态和资源
749
+ is_consuming, _, consumer_tag = await self._get_consume_state()
750
+ _, _, queue = await self._get_connection_resources()
751
+
752
+ if not is_consuming:
753
+ logger.info("未处于消费状态,无需停止")
566
754
  return
567
755
 
568
- self._set_is_consuming(False)
756
+ logger.info(f"开始停止消费(consumer_tag: {consumer_tag})")
757
+
758
+ # 先更新消费状态为False
759
+ await self._set_consume_state(is_consuming=False)
569
760
 
570
- if self._consumer_tag and self.queue:
571
- await self._safe_cancel_consumer()
761
+ # 取消消费者
762
+ if consumer_tag and queue and not await self._is_closed():
763
+ await self._safe_cancel_consumer(consumer_tag, queue)
572
764
 
573
765
  # 等待所有正在处理的消息完成
574
- if self._tracking_messages:
575
- logger.info(f"等待 {len(self._tracking_messages)} 个正在处理的消息完成...")
766
+ tracking_count = await self._get_tracking_count()
767
+ if tracking_count > 0:
768
+ logger.info(f"等待 {tracking_count} 个正在处理的消息完成...")
576
769
  wait_start = asyncio.get_event_loop().time()
577
- while self._tracking_messages and not self._closed:
578
- if asyncio.get_event_loop().time() - wait_start > 30: # 最多等30秒
579
- logger.warning("等待消息处理超时,强制清理跟踪记录")
580
- self._tracking_messages.clear()
770
+ while True:
771
+ # 检查是否超时或已关闭
772
+ if await self._is_closed() or asyncio.get_event_loop().time() - wait_start > 30:
773
+ timeout = asyncio.get_event_loop().time() - wait_start > 30
774
+ if timeout:
775
+ logger.warning("等待消息处理超时,强制清理跟踪记录")
776
+ await self._clear_tracking_messages()
777
+ break
778
+ # 检查跟踪记录是否为空
779
+ current_count = await self._get_tracking_count()
780
+ if current_count == 0:
581
781
  break
582
782
  await asyncio.sleep(1)
583
783
 
584
- # 清理状态
585
- self._consumer_tag = None
586
- self._tracking_messages.clear()
587
- logger.info(f"已停止消费队列: {self.actual_queue_name}")
784
+ # 清理消费状态
785
+ async with self._consume_state_lock:
786
+ self._consumer_tag = None
787
+
788
+ logger.info(f"已停止消费队列: {queue.name if queue else '未知'}")
588
789
 
589
790
  async def _parse_message(self, message: AbstractIncomingMessage) -> Union[Dict[str, Any], str]:
791
+ """解析消息体(无锁,仅处理局部变量)"""
590
792
  try:
591
793
  body_str = message.body.decode('utf-8')
592
794
  self._update_activity_timestamp()
@@ -595,69 +797,82 @@ class RabbitMQClient:
595
797
  return json.loads(body_str)
596
798
  return body_str
597
799
  except json.JSONDecodeError:
598
- logger.warning(f"消息解析JSON失败,返回原始字符串")
800
+ logger.warning(
801
+ f"消息 {message.message_id or id(message)} 解析JSON失败,返回原始字符串")
599
802
  return body_str
600
803
  except Exception as e:
601
- logger.error(f"消息解析出错: {str(e)}")
804
+ logger.error(
805
+ f"消息 {message.message_id or id(message)} 解析出错: {str(e)}")
602
806
  return message.body.decode('utf-8')
603
807
 
604
- async def _message_wrapper(self, message: AbstractIncomingMessage) -> None:
605
- if not self.message_handler or not self._is_consuming:
606
- logger.warning("未设置消息处理器或已停止消费,拒绝消息,重新放到队列")
607
- try:
608
- await message.reject(requeue=True)
609
- except Exception as e:
610
- logger.error(f"拒绝消息失败: {e}")
611
- return
612
-
808
+ async def _handle_business_retry(
809
+ self,
810
+ message: AbstractIncomingMessage,
811
+ error: Exception,
812
+ drop: bool = True
813
+ ) -> None:
814
+ """
815
+ 封装业务失败重试逻辑:更新重试计数Header,延迟3秒重新发布
816
+ 达到最大次数则标记失败(无锁,仅通过原子方法操作跟踪记录)
817
+ """
818
+ # 获取当前重试次数
819
+ current_headers = message.headers or {}
820
+ retry_count = current_headers.get('x-retry-count', 0)
821
+ retry_count += 1
613
822
  message_id = message.message_id or str(id(message))
614
- if message_id in self._tracking_messages:
615
- logger.warning(f"检测到重复处理的消息ID: {message_id},直接确认")
616
- await message.ack()
617
- return
618
823
 
619
- start_time = asyncio.get_event_loop().time()
620
- self._tracking_messages[message_id] = {
621
- 'delivery_tag': message.delivery_tag,
622
- 'acked': False,
623
- 'channel_number': self.channel.number if self.channel else None,
624
- 'start_time': start_time
625
- }
824
+ error_msg = f"[{type(error).__name__}] {str(error)}"[:200]
626
825
 
627
- try:
628
- logger.info(f"收到队列 {self.actual_queue_name} 的消息: {message_id}")
629
- print(f"收到队列 {self.actual_queue_name} 的消息: {message_id}")
630
-
631
- parsed_data = await self._parse_message(message)
632
- await self.message_handler(MQMsgModel(** parsed_data), message)
826
+ # 打印错误日志
827
+ logger.error(
828
+ f"消息 {message_id} 处理出错(第{retry_count}次重试): {error_msg}",
829
+ exc_info=True
830
+ )
633
831
 
832
+ # 达到最大重试次数:ack标记失败
833
+ if drop and retry_count >= MAX_RETRY_COUNT:
834
+ logger.error(
835
+ f"消息 {message_id} 已达到最大重试次数{MAX_RETRY_COUNT},标记为失败")
836
+ # 标记跟踪记录为已确认
837
+ await self._mark_tracking_acked(message_id)
634
838
  await message.ack()
635
- self._tracking_messages[message_id]['acked'] = True
636
839
  self._update_activity_timestamp()
637
- self._update_message_processed_timestamp()
638
- logger.info(f"消息 {message_id} 处理完成并确认")
840
+ return
639
841
 
640
- except Exception as e:
641
- current_headers = message.headers or {}
642
- retry_count = current_headers.get('x-retry-count', 0)
643
- retry_count += 1
842
+ # 构造新消息Header
843
+ new_headers = current_headers.copy()
844
+ new_headers['x-retry-count'] = retry_count
845
+ new_headers['x-retry-error'] = error_msg
644
846
 
645
- logger.error(
646
- f"消息 {message_id} 处理出错(第{retry_count}次重试): {str(e)}",
647
- exc_info=True
648
- )
847
+ # 提交异步任务,延迟3秒后重新发布
848
+ asyncio.create_task(
849
+ self._delayed_republish(
850
+ message, new_headers, retry_count, message_id)
851
+ )
649
852
 
650
- if retry_count >= MAX_RETRY_COUNT:
651
- logger.error(
652
- f"消息 {message_id} 已达到最大重试次数{MAX_RETRY_COUNT},标记为失败")
653
- await message.ack()
654
- self._tracking_messages[message_id]['acked'] = True
655
- self._update_activity_timestamp()
853
+ async def _delayed_republish(
854
+ self,
855
+ message: AbstractIncomingMessage,
856
+ new_headers: Dict[str, Any],
857
+ retry_count: int,
858
+ message_id: str
859
+ ) -> None:
860
+ """延迟发布重试消息(无锁,仅通过原子方法操作资源)"""
861
+ try:
862
+ # 延迟3秒重试
863
+ await asyncio.sleep(3)
864
+
865
+ # 检查客户端状态
866
+ if await self._is_closed():
867
+ logger.warning(f"客户端已关闭,放弃消息 {message_id} 的重试发布")
656
868
  return
657
869
 
658
- new_headers = current_headers.copy()
659
- new_headers['x-retry-count'] = retry_count
870
+ # 获取交换机
871
+ _, exchange, _ = await self._get_connection_resources()
872
+ if not exchange:
873
+ raise Exception("交换机未初始化,无法发布重试消息")
660
874
 
875
+ # 构造新消息
661
876
  new_message = Message(
662
877
  body=message.body,
663
878
  content_type=message.content_type,
@@ -665,22 +880,97 @@ class RabbitMQClient:
665
880
  delivery_mode=message.delivery_mode
666
881
  )
667
882
 
883
+ # 重新发布消息
884
+ await exchange.publish(
885
+ new_message,
886
+ routing_key=self.routing_key or '#',
887
+ mandatory=True,
888
+ timeout=5.0
889
+ )
890
+ self._update_activity_timestamp()
891
+ logger.info(f"消息 {message_id} 已重新发布,当前重试次数: {retry_count}")
892
+
893
+ # 拒绝原始消息(不重新入队)
668
894
  await message.reject(requeue=False)
669
- self._tracking_messages[message_id]['acked'] = True
670
-
671
- if self.exchange:
672
- await self.exchange.publish(
673
- new_message,
674
- routing_key=self.routing_key or '#',
675
- mandatory=True,
676
- timeout=5.0
895
+ # 标记跟踪记录为已确认
896
+ await self._mark_tracking_acked(message_id)
897
+
898
+ except Exception as e:
899
+ logger.error(
900
+ f"消息 {message_id} 延迟发布失败(错误:{str(e)}),触发requeue兜底",
901
+ exc_info=True
902
+ )
903
+ # 发布失败兜底:requeue原始消息
904
+ await message.reject(requeue=True)
905
+
906
+ async def _message_wrapper(self, message: AbstractIncomingMessage) -> None:
907
+ """消息处理包装器(无锁嵌套,仅通过原子方法操作状态)"""
908
+ message_id = message.message_id or str(id(message))
909
+ max_check_attempts = 3
910
+ check_interval = 1
911
+
912
+ # 重试检查消费状态(处理极端并发场景)
913
+ for attempt in range(max_check_attempts):
914
+ is_consuming, handler, _ = await self._get_consume_state()
915
+ if is_consuming and handler:
916
+ break
917
+ if attempt < max_check_attempts - 1:
918
+ logger.debug(
919
+ f"消息 {message_id} 处理状态检查重试(第{attempt+1}次): "
920
+ f"handler={'存在' if handler else '不存在'}, "
921
+ f"is_consuming={is_consuming}"
677
922
  )
678
- self._update_activity_timestamp()
679
- logger.info(f"消息 {message_id} 已重新发布,当前重试次数: {retry_count}")
923
+ await asyncio.sleep(check_interval)
924
+
925
+ # 最终状态判断:状态异常则拒绝消息
926
+ is_consuming, handler, _ = await self._get_consume_state()
927
+ if not is_consuming or not handler:
928
+ err_msg = f"消息 {message_id} 拒绝处理:handler={'存在' if handler else '不存在'}, is_consuming={is_consuming}"
929
+ logger.warning(err_msg)
930
+ try:
931
+ await self._handle_business_retry(message, Exception(err_msg), drop=False)
932
+ except Exception as e:
933
+ logger.error(f"消息 {message_id} 拒绝处理失败: {e}")
934
+ return
935
+
936
+ # 检查重复处理
937
+ if await self._check_duplicate_message(message_id):
938
+ logger.warning(f"检测到重复处理的消息ID: {message_id},直接确认")
939
+ await message.ack()
940
+ return
941
+
942
+ # 添加跟踪记录
943
+ channel, _, _ = await self._get_connection_resources()
944
+ channel_number = channel.number if channel else None
945
+ await self._add_tracking_message(message_id, message.delivery_tag, channel_number)
946
+
947
+ try:
948
+ logger.info(f"收到队列 {self.actual_queue_name} 的消息: {message_id}")
949
+
950
+ # 解析消息
951
+ parsed_data = await self._parse_message(message)
952
+ # 转换为MQMsgModel
953
+ if isinstance(parsed_data, dict):
954
+ msg_model = MQMsgModel(**parsed_data)
955
+ else:
956
+ msg_model = MQMsgModel(data=parsed_data)
957
+
958
+ # 调用业务处理器
959
+ await handler(msg_model, message)
960
+
961
+ # 处理成功:标记跟踪记录并确认消息
962
+ await self._mark_tracking_acked(message_id)
963
+ await message.ack()
964
+ self._update_activity_timestamp()
965
+ self._update_message_processed_timestamp()
966
+ logger.info(f"消息 {message_id} 处理完成并确认")
967
+
968
+ except Exception as e:
969
+ # 业务处理失败:触发重试逻辑
970
+ await self._handle_business_retry(message, e)
680
971
  finally:
681
- if message_id in self._tracking_messages:
682
- del self._tracking_messages[message_id]
683
- logger.info(f"已删除消息跟踪信息: {message_id}")
972
+ # 清理跟踪记录
973
+ await self._remove_tracking_message(message_id)
684
974
 
685
975
  async def __aenter__(self):
686
976
  await self.connect()