sycommon-python-lib 0.1.57b1__py3-none-any.whl → 0.1.57b7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sycommon/llm/embedding.py CHANGED
@@ -1,8 +1,7 @@
1
1
  import asyncio
2
- import json
3
2
  import aiohttp
4
- from typing import Union, List, Optional
5
-
3
+ import atexit
4
+ from typing import Union, List, Optional, Dict
6
5
  from sycommon.config.Config import SingletonMeta
7
6
  from sycommon.config.EmbeddingConfig import EmbeddingConfig
8
7
  from sycommon.config.RerankerConfig import RerankerConfig
@@ -23,12 +22,78 @@ class Embedding(metaclass=SingletonMeta):
23
22
  self.reranker_base_url = RerankerConfig.from_config(
24
23
  self.default_reranker_model).baseUrl
25
24
 
25
+ # [修复] 缓存配置URL,避免高并发下重复读取配置文件
26
+ self._embedding_url_cache: Dict[str, str] = {
27
+ self.default_embedding_model: self.embeddings_base_url
28
+ }
29
+ self._reranker_url_cache: Dict[str, str] = {
30
+ self.default_reranker_model: self.reranker_base_url
31
+ }
32
+
26
33
  # 并发信号量
27
34
  self.semaphore = asyncio.Semaphore(self.max_concurrency)
28
- # 全局默认超时:永不超时(None)
29
35
  self.default_timeout = aiohttp.ClientTimeout(total=None)
30
36
 
31
- async def _get_embeddings_http_async(
37
+ # 核心优化:创建全局可复用的ClientSession(连接池复用)
38
+ self.session = None
39
+
40
+ # [修复] 注册退出钩子,确保程序结束时关闭连接池
41
+ atexit.register(self._sync_close_session)
42
+
43
+ async def init_session(self):
44
+ """初始化全局ClientSession(仅创建一次)"""
45
+ if self.session is None or self.session.closed:
46
+ # 配置连接池参数,适配高并发
47
+ connector = aiohttp.TCPConnector(
48
+ limit=self.max_concurrency, # 连接池最大连接数
49
+ limit_per_host=self.max_concurrency, # 每个域名的最大连接数
50
+ ttl_dns_cache=300, # DNS缓存时间
51
+ enable_cleanup_closed=True # 自动清理关闭的连接
52
+ )
53
+ self.session = aiohttp.ClientSession(
54
+ connector=connector,
55
+ timeout=self.default_timeout
56
+ )
57
+
58
+ async def close_session(self):
59
+ """关闭全局Session(程序退出时调用)"""
60
+ if self.session and not self.session.closed:
61
+ await self.session.close()
62
+
63
+ def _sync_close_session(self):
64
+ """同步关闭Session的封装,供atexit调用"""
65
+ try:
66
+ loop = asyncio.get_event_loop()
67
+ if loop.is_running():
68
+ # [修复] 修正缩进,确保 create_task 的异常能被捕获
69
+ try:
70
+ loop.create_task(self.close_session())
71
+ except Exception:
72
+ pass
73
+ else:
74
+ try:
75
+ loop.run_until_complete(self.close_session())
76
+ except Exception:
77
+ pass
78
+ except Exception:
79
+ # 捕获获取 loop 时的异常
80
+ pass
81
+
82
+ def _get_embedding_url(self, model: str) -> str:
83
+ """获取Embedding URL(带缓存)"""
84
+ if model not in self._embedding_url_cache:
85
+ self._embedding_url_cache[model] = EmbeddingConfig.from_config(
86
+ model).baseUrl
87
+ return self._embedding_url_cache[model]
88
+
89
+ def _get_reranker_url(self, model: str) -> str:
90
+ """获取Reranker URL(带缓存)"""
91
+ if model not in self._reranker_url_cache:
92
+ self._reranker_url_cache[model] = RerankerConfig.from_config(
93
+ model).baseUrl
94
+ return self._reranker_url_cache[model]
95
+
96
+ async def _get_embeddings_http_core(
32
97
  self,
33
98
  input: Union[str, List[str]],
34
99
  encoding_format: str = None,
@@ -36,13 +101,14 @@ class Embedding(metaclass=SingletonMeta):
36
101
  timeout: aiohttp.ClientTimeout = None,
37
102
  **kwargs
38
103
  ):
104
+ """embedding请求核心逻辑"""
105
+ await self.init_session() # 确保Session已初始化
39
106
  async with self.semaphore:
40
- # 优先使用传入的超时,无则用全局默认
41
107
  request_timeout = timeout or self.default_timeout
42
-
43
- # 优先使用传入的模型名,无则用默认值
44
108
  target_model = model or self.default_embedding_model
45
- target_base_url = EmbeddingConfig.from_config(target_model).baseUrl
109
+
110
+ # [修复] 使用缓存获取URL
111
+ target_base_url = self._get_embedding_url(target_model)
46
112
  url = f"{target_base_url}/v1/embeddings"
47
113
 
48
114
  request_body = {
@@ -52,25 +118,48 @@ class Embedding(metaclass=SingletonMeta):
52
118
  }
53
119
  request_body.update(kwargs)
54
120
 
121
+ # 复用全局Session
55
122
  try:
56
- async with aiohttp.ClientSession(timeout=request_timeout) as session:
57
- async with session.post(url, json=request_body) as response:
58
- if response.status != 200:
59
- error_detail = await response.text()
60
- SYLogger.error(
61
- f"Embedding request failed (model: {target_model}): {error_detail}")
62
- return None
63
- return await response.json()
64
- except asyncio.TimeoutError:
123
+ async with self.session.post(
124
+ url,
125
+ json=request_body,
126
+ timeout=request_timeout
127
+ ) as response:
128
+ if response.status != 200:
129
+ error_detail = await response.text()
130
+ # [日志] 记录详细的HTTP错误响应
131
+ SYLogger.error(
132
+ f"Embedding request HTTP Error. Status: {response.status}, "
133
+ f"Model: {target_model}, URL: {url}. Detail: {error_detail}"
134
+ )
135
+ return None
136
+ return await response.json()
137
+ except (aiohttp.ClientConnectionResetError, asyncio.TimeoutError, aiohttp.ClientError) as e:
138
+ # [日志] 记录网络错误
65
139
  SYLogger.error(
66
- f"Embedding request timeout (model: {target_model})")
140
+ f"Embedding request Network Error. Model: {target_model}, URL: {url}. "
141
+ f"Error: {e.__class__.__name__} - {str(e)}"
142
+ )
67
143
  return None
68
144
  except Exception as e:
145
+ # 记录其他未预期的异常
69
146
  SYLogger.error(
70
- f"Embedding request unexpected error (model: {target_model}): {str(e)}")
147
+ f"Unexpected error in _get_embeddings_http_core: {str(e)}", exc_info=True)
71
148
  return None
72
149
 
73
- async def _get_reranker_http_async(
150
+ async def _get_embeddings_http_async(
151
+ self,
152
+ input: Union[str, List[str]],
153
+ encoding_format: str = None,
154
+ model: str = None,
155
+ timeout: aiohttp.ClientTimeout = None, ** kwargs
156
+ ):
157
+ """对外暴露的embedding请求方法"""
158
+ return await self._get_embeddings_http_core(
159
+ input, encoding_format, model, timeout, ** kwargs
160
+ )
161
+
162
+ async def _get_reranker_http_core(
74
163
  self,
75
164
  documents: List[str],
76
165
  query: str,
@@ -79,16 +168,16 @@ class Embedding(metaclass=SingletonMeta):
79
168
  max_chunks_per_doc: Optional[int] = None,
80
169
  return_documents: Optional[bool] = True,
81
170
  return_len: Optional[bool] = True,
82
- timeout: aiohttp.ClientTimeout = None,
83
- **kwargs
171
+ timeout: aiohttp.ClientTimeout = None, ** kwargs
84
172
  ):
173
+ """reranker请求核心逻辑"""
174
+ await self.init_session() # 确保Session已初始化
85
175
  async with self.semaphore:
86
- # 优先使用传入的超时,无则用全局默认
87
176
  request_timeout = timeout or self.default_timeout
88
-
89
- # 优先使用传入的模型名,无则用默认值
90
177
  target_model = model or self.default_reranker_model
91
- target_base_url = RerankerConfig.from_config(target_model).baseUrl
178
+
179
+ # [修复] 使用缓存获取URL
180
+ target_base_url = self._get_reranker_url(target_model)
92
181
  url = f"{target_base_url}/v1/rerank"
93
182
 
94
183
  request_body = {
@@ -99,28 +188,66 @@ class Embedding(metaclass=SingletonMeta):
99
188
  "max_chunks_per_doc": max_chunks_per_doc,
100
189
  "return_documents": return_documents,
101
190
  "return_len": return_len,
102
- "kwargs": json.dumps(kwargs),
103
191
  }
104
192
  request_body.update(kwargs)
105
193
 
194
+ # 复用全局Session
106
195
  try:
107
- async with aiohttp.ClientSession(timeout=request_timeout) as session:
108
- async with session.post(url, json=request_body) as response:
109
- if response.status != 200:
110
- error_detail = await response.text()
111
- SYLogger.error(
112
- f"Rerank request failed (model: {target_model}): {error_detail}")
113
- return None
114
- return await response.json()
115
- except asyncio.TimeoutError:
196
+ async with self.session.post(
197
+ url,
198
+ json=request_body,
199
+ timeout=request_timeout
200
+ ) as response:
201
+ if response.status != 200:
202
+ error_detail = await response.text()
203
+ # [日志] 记录详细的HTTP错误响应
204
+ SYLogger.error(
205
+ f"Reranker request HTTP Error. Status: {response.status}, "
206
+ f"Model: {target_model}, URL: {url}. Detail: {error_detail}"
207
+ )
208
+ return None
209
+ return await response.json()
210
+ except (aiohttp.ClientConnectionResetError, asyncio.TimeoutError, aiohttp.ClientError) as e:
211
+ # [日志] 记录网络错误
116
212
  SYLogger.error(
117
- f"Rerank request timeout (model: {target_model})")
213
+ f"Reranker request Network Error. Model: {target_model}, URL: {url}. "
214
+ f"Error: {e.__class__.__name__} - {str(e)}"
215
+ )
118
216
  return None
119
217
  except Exception as e:
218
+ # 记录其他未预期的异常
120
219
  SYLogger.error(
121
- f"Rerank request unexpected error (model: {target_model}): {str(e)}")
220
+ f"Unexpected error in _get_reranker_http_core: {str(e)}", exc_info=True)
122
221
  return None
123
222
 
223
+ async def _get_reranker_http_async(
224
+ self,
225
+ documents: List[str],
226
+ query: str,
227
+ top_n: Optional[int] = None,
228
+ model: str = None,
229
+ max_chunks_per_doc: Optional[int] = None,
230
+ return_documents: Optional[bool] = True,
231
+ return_len: Optional[bool] = True,
232
+ timeout: aiohttp.ClientTimeout = None, ** kwargs
233
+ ):
234
+ """对外暴露的reranker请求方法"""
235
+ return await self._get_reranker_http_core(
236
+ documents, query, top_n, model, max_chunks_per_doc,
237
+ return_documents, return_len, timeout, **kwargs
238
+ )
239
+
240
+ def _get_dimension(self, model: str) -> int:
241
+ """获取模型维度,用于生成兜底零向量"""
242
+ try:
243
+ config = EmbeddingConfig.from_config(model)
244
+ if hasattr(config, 'dimension'):
245
+ return int(config.dimension)
246
+ except Exception:
247
+ pass
248
+ # 默认兜底 1024
249
+ return 1024
250
+
124
251
  async def get_embeddings(
125
252
  self,
126
253
  corpus: List[str],
@@ -145,28 +272,52 @@ class Embedding(metaclass=SingletonMeta):
145
272
  SYLogger.warning(
146
273
  f"Invalid timeout type: {type(timeout)}, must be int/float, use default timeout")
147
274
 
275
+ actual_model = model or self.default_embedding_model
276
+
148
277
  SYLogger.info(
149
- f"Requesting embeddings for corpus: {corpus} (model: {model or self.default_embedding_model}, max_concurrency: {self.max_concurrency}, timeout: {timeout or 'None'})")
150
-
151
- # 给每个异步任务传入模型名称和超时配置
152
- tasks = [self._get_embeddings_http_async(
153
- text, model=model, timeout=request_timeout) for text in corpus]
154
- results = await asyncio.gather(*tasks)
155
-
156
- vectors = []
157
- for result in results:
158
- if result is None:
159
- zero_vector = [0.0] * 1024
160
- vectors.append(zero_vector)
161
- SYLogger.warning(
162
- f"Embedding request failed, append zero vector (1024D)")
163
- continue
164
- for item in result["data"]:
165
- vectors.append(item["embedding"])
278
+ f"Requesting embeddings for corpus: {len(corpus)} items (model: {actual_model}, max_concurrency: {self.max_concurrency}, timeout: {timeout or 'None'})")
279
+
280
+ all_vectors = []
281
+
282
+ # [修复] 增加 Chunk 处理逻辑,防止 corpus 过大导致内存溢出或协程过多
283
+ # 每次最多处理 max_concurrency * 2 个请求,避免一次性创建几十万个协程
284
+ batch_size = self.max_concurrency * 2
285
+
286
+ for i in range(0, len(corpus), batch_size):
287
+ batch_texts = corpus[i: i + batch_size]
288
+
289
+ SYLogger.info(
290
+ f"Requesting embeddings for text: {len(batch_texts)} items (model: {actual_model}, timeout: {timeout or 'None'})")
291
+
292
+ # 给每个异步任务传入模型名称和超时配置
293
+ tasks = [self._get_embeddings_http_async(
294
+ text, model=actual_model, timeout=request_timeout) for text in batch_texts]
295
+ results = await asyncio.gather(*tasks)
296
+
297
+ for result in results:
298
+ if result is None:
299
+ dim = self._get_dimension(actual_model)
300
+
301
+ zero_vector = [0.0] * dim
302
+ all_vectors.append(zero_vector)
303
+ # [日志] 补充日志,明确是补零操作
304
+ SYLogger.warning(
305
+ f"Embedding request failed (returned None), appending zero vector ({dim}D) for model {actual_model}")
306
+ continue
307
+
308
+ # 从返回结果中提取向量
309
+ try:
310
+ for item in result["data"]:
311
+ embedding = item["embedding"]
312
+ all_vectors.append(embedding)
313
+ except (KeyError, TypeError) as e:
314
+ SYLogger.error(f"Failed to parse embedding result: {e}")
315
+ dim = self._get_dimension(actual_model)
316
+ all_vectors.append([0.0] * dim)
166
317
 
167
318
  SYLogger.info(
168
- f"Embeddings for corpus: {corpus} created (model: {model or self.default_embedding_model})")
169
- return vectors
319
+ f"Embeddings for corpus created: {len(all_vectors)} vectors (model: {actual_model})")
320
+ return all_vectors
170
321
 
171
322
  async def get_reranker(
172
323
  self,
@@ -194,11 +345,14 @@ class Embedding(metaclass=SingletonMeta):
194
345
  SYLogger.warning(
195
346
  f"Invalid timeout type: {type(timeout)}, must be int/float, use default timeout")
196
347
 
348
+ actual_model = model or self.default_reranker_model
197
349
  SYLogger.info(
198
- f"Requesting reranker for top_results: {top_results} (model: {model or self.default_reranker_model}, max_concurrency: {self.max_concurrency}, timeout: {timeout or 'None'})")
199
-
350
+ f"Requesting reranker for top_results: {top_results} (model: {actual_model}, max_concurrency: {self.max_concurrency}, timeout: {timeout or 'None'})")
351
+ # 打印请求参数
352
+ SYLogger.info(
353
+ f"Requesting reranker for top_results: {top_results} (model: {actual_model}) (query: {query}) (timeout: {timeout or 'None'})")
200
354
  data = await self._get_reranker_http_async(
201
- top_results, query, model=model, timeout=request_timeout)
355
+ top_results, query, model=actual_model, timeout=request_timeout)
202
356
  SYLogger.info(
203
- f"Reranker for top_results: {top_results} completed (model: {model or self.default_reranker_model})")
357
+ f"Reranker for top_results completed (model: {actual_model})")
204
358
  return data
sycommon/llm/get_llm.py CHANGED
@@ -3,11 +3,15 @@ from langchain.chat_models import init_chat_model
3
3
  from sycommon.config.LLMConfig import LLMConfig
4
4
  from sycommon.llm.sy_langfuse import LangfuseInitializer
5
5
  from sycommon.llm.usage_token import LLMWithAutoTokenUsage
6
+ from typing import Any
6
7
 
7
8
 
8
9
  def get_llm(
9
10
  model: str = None,
10
- streaming: bool = False
11
+ *,
12
+ streaming: bool = False,
13
+ temperature: float = 0.1,
14
+ **kwargs: Any
11
15
  ) -> LLMWithAutoTokenUsage:
12
16
  if not model:
13
17
  model = "Qwen2.5-72B"
@@ -16,22 +20,25 @@ def get_llm(
16
20
  if not llmConfig:
17
21
  raise Exception(f"无效的模型配置:{model}")
18
22
 
19
- # 初始化Langfuse
23
+ # 初始化 Langfuse
20
24
  langfuse_callbacks, langfuse = LangfuseInitializer.get()
21
-
22
25
  callbacks = [LLMLogger()] + langfuse_callbacks
23
26
 
24
- llm = init_chat_model(
25
- model_provider=llmConfig.provider,
26
- model=llmConfig.model,
27
- base_url=llmConfig.baseUrl,
28
- api_key="-",
29
- temperature=0.1,
30
- streaming=streaming,
31
- callbacks=callbacks
32
- )
27
+ init_params = {
28
+ "model_provider": llmConfig.provider,
29
+ "model": llmConfig.model,
30
+ "base_url": llmConfig.baseUrl,
31
+ "api_key": "-",
32
+ "callbacks": callbacks,
33
+ "temperature": temperature,
34
+ "streaming": streaming,
35
+ }
36
+
37
+ init_params.update(kwargs)
38
+
39
+ llm = init_chat_model(**init_params)
33
40
 
34
41
  if llm is None:
35
42
  raise Exception(f"初始化原始LLM实例失败:{model}")
36
43
 
37
- return LLMWithAutoTokenUsage(llm, langfuse)
44
+ return LLMWithAutoTokenUsage(llm, langfuse, llmConfig)
@@ -1,21 +1,109 @@
1
+ import tiktoken
1
2
  from typing import Dict, List, Optional, Any
2
3
  from langfuse import Langfuse, LangfuseSpan, propagate_attributes
3
4
  from sycommon.llm.llm_logger import LLMLogger
4
5
  from langchain_core.runnables import Runnable, RunnableConfig
5
- from langchain_core.messages import BaseMessage, HumanMessage
6
+ from langchain_core.messages import BaseMessage, SystemMessage, HumanMessage
6
7
  from sycommon.llm.llm_tokens import TokensCallbackHandler
7
8
  from sycommon.logging.kafka_log import SYLogger
9
+ from sycommon.config.LLMConfig import LLMConfig
8
10
  from sycommon.tools.env import get_env_var
9
11
  from sycommon.tools.merge_headers import get_header_value
10
12
 
11
13
 
12
14
  class StructuredRunnableWithToken(Runnable):
13
- """带Token统计的Runnable类"""
15
+ """
16
+ 统一功能 Runnable:Trace追踪 + Token统计 + 自动上下文压缩
17
+ """
14
18
 
15
- def __init__(self, retry_chain: Runnable, langfuse: Optional[Langfuse]):
19
+ def __init__(
20
+ self,
21
+ retry_chain: Runnable,
22
+ langfuse: Optional[Langfuse] = None,
23
+ llmConfig: Optional[LLMConfig] = None,
24
+ model_name: str = "Qwen2.5-72B",
25
+ enable_compression: bool = True,
26
+ threshold_ratio: float = 0.8
27
+ ):
16
28
  super().__init__()
17
29
  self.retry_chain = retry_chain
18
30
  self.langfuse = langfuse
31
+ self.llmConfig = llmConfig
32
+ self.model_name = model_name
33
+ self.enable_compression = enable_compression
34
+ self.threshold_ratio = threshold_ratio
35
+
36
+ # 初始化 Tokenizer
37
+ try:
38
+ self.encoding = tiktoken.encoding_for_model(model_name)
39
+ except KeyError:
40
+ self.encoding = tiktoken.get_encoding("cl100k_base")
41
+
42
+ def _count_tokens(self, messages: List[BaseMessage]) -> int:
43
+ """快速估算 Token 数量"""
44
+ num_tokens = 0
45
+ for message in messages:
46
+ num_tokens += 4 # 每条消息的固定开销
47
+ # 兼容 content 是字符串或者 dict 的情况
48
+ content = message.content
49
+ if isinstance(content, str):
50
+ num_tokens += len(self.encoding.encode(content))
51
+ elif isinstance(content, list): # 多模态或复杂结构
52
+ for item in content:
53
+ if isinstance(item, dict) and "text" in item:
54
+ num_tokens += len(self.encoding.encode(item["text"]))
55
+ elif isinstance(content, dict):
56
+ num_tokens += len(self.encoding.encode(str(content)))
57
+ return num_tokens
58
+
59
+ async def _acompress_context(self, messages: List[BaseMessage]) -> List[BaseMessage]:
60
+ """执行异步上下文压缩"""
61
+ # 策略:保留 System Prompt + 最近 N 条,中间的摘要
62
+ keep_last_n = 4
63
+
64
+ # 分离系统消息和对话消息
65
+ system_msgs = [m for m in messages if isinstance(m, SystemMessage)]
66
+ conversation = [
67
+ m for m in messages if not isinstance(m, SystemMessage)]
68
+
69
+ if len(conversation) <= keep_last_n:
70
+ return messages
71
+
72
+ to_summarize = conversation[:-keep_last_n]
73
+ keep_recent = conversation[-keep_last_n:]
74
+
75
+ # 构造摘要 Prompt
76
+ # 注意:这里直接使用 retry_chain 进行摘要,防止死循环
77
+ summary_prompt = [
78
+ SystemMessage(
79
+ content="请将上下文内容进行摘要,保留关键信息,将内容压缩到原来长度的50%左右,保留关键信息。"),
80
+ HumanMessage(content=f"历史记录:\n{to_summarize}\n\n摘要:")
81
+ ]
82
+
83
+ try:
84
+ SYLogger.info(
85
+ f"🚀 Triggering compression: {len(to_summarize)} messages -> summary")
86
+
87
+ # 调用子链生成摘要
88
+ # 【关键】必须清空 callbacks,否则 Langfuse 会递归追踪,导致死循环或噪音
89
+ summary_result = await self.retry_chain.ainvoke(
90
+ {"messages": summary_prompt},
91
+ config=RunnableConfig(callbacks=[])
92
+ )
93
+
94
+ summary_text = summary_result.content if hasattr(
95
+ summary_result, 'content') else str(summary_result)
96
+
97
+ # 重组消息:System + Summary + Recent
98
+ new_messages = system_msgs + \
99
+ [SystemMessage(
100
+ content=f"[History Summary]: {summary_text}")] + keep_recent
101
+ return new_messages
102
+
103
+ except Exception as e:
104
+ SYLogger.error(
105
+ f"❌ Compression failed: {e}, using original context.")
106
+ return messages
19
107
 
20
108
  def _adapt_input(self, input: Any) -> List[BaseMessage]:
21
109
  """适配输入格式"""
@@ -25,6 +113,10 @@ class StructuredRunnableWithToken(Runnable):
25
113
  return [input]
26
114
  elif isinstance(input, str):
27
115
  return [HumanMessage(content=input)]
116
+ elif isinstance(input, dict) and "messages" in input:
117
+ # 如果已经是标准格式字典,直接提取
118
+ msgs = input["messages"]
119
+ return msgs if isinstance(msgs, list) else [msgs]
28
120
  elif isinstance(input, dict) and "input" in input:
29
121
  return [HumanMessage(content=str(input["input"]))]
30
122
  else:
@@ -40,7 +132,7 @@ class StructuredRunnableWithToken(Runnable):
40
132
  token_handler = TokensCallbackHandler()
41
133
 
42
134
  if config is None:
43
- processed_config = {"callbacks": [], "metadata": {}}
135
+ processed_config = RunnableConfig(callbacks=[], metadata={})
44
136
  else:
45
137
  processed_config = config.copy()
46
138
  if "callbacks" not in processed_config:
@@ -59,6 +151,7 @@ class StructuredRunnableWithToken(Runnable):
59
151
  callbacks.append(LLMLogger())
60
152
  callbacks.append(token_handler)
61
153
 
154
+ # 去重
62
155
  callback_types = {}
63
156
  unique_callbacks = []
64
157
  for cb in callbacks:
@@ -131,6 +224,8 @@ class StructuredRunnableWithToken(Runnable):
131
224
  user_id=user_id
132
225
  )
133
226
 
227
+ # 【同步模式下不建议触发压缩,因为压缩本身是异步调用 LLM】
228
+ # 如果同步也要压缩,需要用 asyncio.run(...),这里暂时保持原逻辑直接透传
134
229
  adapted_input = self._adapt_input(input)
135
230
  input_data = {"messages": adapted_input}
136
231
 
@@ -169,12 +264,26 @@ class StructuredRunnableWithToken(Runnable):
169
264
  user_id=user_id
170
265
  )
171
266
 
267
+ # 1. 适配输入
172
268
  adapted_input = self._adapt_input(input)
269
+
270
+ # 2. 检查并执行上下文压缩 (仅在异步模式且开启时)
271
+ if self.enable_compression:
272
+ max_tokens = self.llmConfig.maxTokens
273
+ current_tokens = self._count_tokens(adapted_input)
274
+
275
+ if current_tokens > max_tokens * self.threshold_ratio:
276
+ SYLogger.warning(
277
+ f"⚠️ Context limit reached: {current_tokens}/{max_tokens}")
278
+ # 执行压缩,替换 adapted_input
279
+ adapted_input = await self._acompress_context(adapted_input)
280
+
173
281
  input_data = {"messages": adapted_input}
174
282
 
175
283
  if span:
176
284
  span.update_trace(input=input_data)
177
285
 
286
+ # 3. 调用子链
178
287
  structured_result = await self.retry_chain.ainvoke(
179
288
  input_data,
180
289
  config=processed_config
@@ -6,6 +6,7 @@ from langchain_core.output_parsers import PydanticOutputParser
6
6
  from langchain_core.messages import BaseMessage, HumanMessage
7
7
  from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
8
8
  from pydantic import BaseModel, ValidationError, Field
9
+ from sycommon.config.LLMConfig import LLMConfig
9
10
  from sycommon.llm.struct_token import StructuredRunnableWithToken
10
11
 
11
12
 
@@ -13,9 +14,10 @@ class LLMWithAutoTokenUsage(BaseChatModel):
13
14
  """自动为结构化调用返回token_usage的LLM包装类"""
14
15
  llm: BaseChatModel = Field(default=None)
15
16
  langfuse: Optional[Langfuse] = Field(default=None, exclude=True)
17
+ llmConfig: Optional[LLMConfig] = Field(default=None, exclude=True)
16
18
 
17
- def __init__(self, llm: BaseChatModel, langfuse: Langfuse, **kwargs):
18
- super().__init__(llm=llm, langfuse=langfuse, **kwargs)
19
+ def __init__(self, llm: BaseChatModel, langfuse: Langfuse, llmConfig: LLMConfig, **kwargs):
20
+ super().__init__(llm=llm, langfuse=langfuse, llmConfig=llmConfig, **kwargs)
19
21
 
20
22
  def with_structured_output(
21
23
  self,
@@ -106,7 +108,7 @@ class LLMWithAutoTokenUsage(BaseChatModel):
106
108
  "initial": 0.1, "max": 3.0, "exp_base": 2.0, "jitter": 1.0}
107
109
  )
108
110
 
109
- return StructuredRunnableWithToken(retry_chain, self.langfuse)
111
+ return StructuredRunnableWithToken(retry_chain, self.langfuse, self.llmConfig)
110
112
 
111
113
  # ========== 实现BaseChatModel抽象方法 ==========
112
114
  def _generate(self, messages, stop=None, run_manager=None, ** kwargs):
@@ -117,112 +117,112 @@ class RabbitMQClient:
117
117
  logger.info(f"队列重建成功: {self.queue_name}")
118
118
 
119
119
  async def connect(self) -> None:
120
+ """连接方法(修复恢复消费失效问题)"""
120
121
  if self._closed:
121
122
  raise RuntimeError("客户端已关闭,无法重新连接")
122
123
 
123
- # 1. 并发控制:使用 _connect_lock 保证只有一个协程在执行连接流程
124
- async with self._connect_lock:
125
- # 如果已经在连了,等待其完成
124
+ # 1. 获取 Condition
125
+ await self._connect_condition.acquire()
126
+
127
+ try:
128
+ # ===== 阶段 A: 快速检查与等待 =====
129
+ if await self.is_connected:
130
+ self._connect_condition.release()
131
+ return
132
+
126
133
  if self._connecting:
127
- logger.debug("连接正在进行中,等待现有连接完成...")
128
134
  try:
129
- # 等待条件变量,超时设为 60 秒防止死等
130
- await asyncio.wait_for(
131
- self._connect_condition.wait_for(
132
- lambda: not self._connecting),
133
- timeout=60.0
134
- )
135
+ logger.debug("连接正在进行中,等待现有连接完成...")
136
+ await asyncio.wait_for(self._connect_condition.wait(), timeout=60.0)
135
137
  except asyncio.TimeoutError:
138
+ self._connect_condition.release()
136
139
  raise RuntimeError("等待连接超时")
137
140
 
138
- # 等待结束后,再次检查状态
139
- if not await self.is_connected:
141
+ if await self.is_connected:
142
+ self._connect_condition.release()
143
+ return
144
+ else:
145
+ self._connect_condition.release()
140
146
  raise RuntimeError("等待重连后,连接状态依然无效")
141
- return
142
147
 
143
- # 标记开始连接
148
+ # ===== 阶段 B: 标记开始连接 =====
144
149
  self._connecting = True
150
+ # 【关键】释放锁,允许其他协程进入等待逻辑
151
+ self._connect_condition.release()
145
152
 
146
- # 释放 _connect_lock,允许其他协程读取状态,但在连接完成前阻止新的连接请求
147
- # 注意:这里释放了 _connect_lock,但 self._connecting = True 阻止了新的连接流程
153
+ except Exception as e:
154
+ if self._connect_condition.locked():
155
+ self._connect_condition.release()
156
+ raise
148
157
 
158
+ # === 阶段 C: 执行耗时的连接逻辑 (此时已释放锁,不阻塞其他协程) ===
149
159
  try:
150
- # --- 阶段1: 清理旧资源 ---
151
- # 重新获取锁进行资源清理
152
- async with self._connect_lock:
153
- was_consuming = self._consumer_tag is not None
154
-
155
- if self._channel_conn and self._conn_close_callback:
156
- try:
157
- self._channel_conn.close_callbacks.discard(
158
- self._conn_close_callback)
159
- except Exception:
160
- pass
161
-
162
- self._channel = None
163
- self._channel_conn = None
164
- self._exchange = None
165
- self._queue = None
166
- self._conn_close_callback = None
167
-
168
- # --- 阶段2: 获取新连接 (耗时IO) ---
160
+ # --- 步骤 1: 记录旧状态并清理资源 ---
161
+ # 必须在清理前记录状态
162
+ was_consuming = self._consumer_tag is not None
163
+
164
+ # 清理连接回调,防止旧的连接关闭触发新的重连
165
+ if self._channel_conn:
166
+ try:
167
+ if self._channel_conn.close_callbacks:
168
+ self._channel_conn.close_callbacks.clear()
169
+ except Exception:
170
+ pass
171
+
172
+ # 统一重置资源状态
173
+ self._channel = None
174
+ self._channel_conn = None
175
+ self._exchange = None
176
+ self._queue = None
177
+ self._consumer_tag = None
178
+
179
+ # --- 步骤 2: 获取新连接 ---
169
180
  self._channel, self._channel_conn = await self.connection_pool.acquire_channel()
170
181
 
171
- # 设置回调
182
+ # 设置连接关闭回调
172
183
  def on_conn_closed(conn, exc):
173
- logger.warning(f"检测到连接关闭: {exc}")
184
+ logger.warning(f"检测到底层连接关闭: {exc}")
174
185
  if not self._closed and not self._connecting:
175
186
  asyncio.create_task(self._safe_reconnect())
176
187
 
177
- self._conn_close_callback = on_conn_closed
178
188
  if self._channel_conn:
179
- self._channel_conn.close_callbacks.add(
180
- self._conn_close_callback)
189
+ self._channel_conn.close_callbacks.add(on_conn_closed)
181
190
 
182
- # 重建资源
191
+ # --- 步骤 3: 重建基础资源 (交换机和队列) ---
183
192
  await self._rebuild_resources()
184
193
 
185
- # --- 阶段3: 恢复消费 ---
186
- if was_consuming and self._message_handler and self.queue_name and self.queue_name.endswith(f".{self.app_name}"):
187
- logger.info("🔄 检测到重连前处于消费状态,尝试自动恢复...")
194
+ # --- 步骤 4: 恢复消费 ---
195
+ if was_consuming and self._message_handler:
196
+ logger.info("🔄 检测到重连前处于消费状态,尝试自动恢复消费...")
188
197
  try:
189
- self._queue = await self._channel.declare_queue(
190
- name=self.queue_name,
191
- durable=self.durable,
192
- auto_delete=self.auto_delete,
193
- passive=False,
194
- )
195
- await self._queue.bind(exchange=self._exchange, routing_key=self.routing_key)
196
- self._consumer_tag = await self._queue.consume(self._process_message_callback)
198
+ # 直接调用 start_consuming 来恢复,它内部包含了完整的队列检查和绑定逻辑
199
+ self._consumer_tag = await self.start_consuming()
197
200
  logger.info(f"✅ 消费已自动恢复: {self._consumer_tag}")
198
201
  except Exception as e:
199
202
  logger.error(f"❌ 自动恢复消费失败: {e}")
200
203
  self._consumer_tag = None
201
- else:
202
- self._consumer_tag = None
203
204
 
204
205
  logger.info("客户端连接初始化完成")
205
206
 
206
207
  except Exception as e:
207
208
  logger.error(f"客户端连接失败: {str(e)}", exc_info=True)
208
-
209
- # 异常时清理资源
210
- async with self._connect_lock:
211
- if self._channel_conn and self._conn_close_callback:
212
- self._channel_conn.close_callbacks.discard(
213
- self._conn_close_callback)
214
- self._channel = None
215
- self._channel_conn = None
216
- self._consumer_tag = None
217
-
209
+ # 异常时彻底清理
210
+ if self._channel_conn and self._channel_conn.close_callbacks:
211
+ self._channel_conn.close_callbacks.clear()
212
+ self._channel = None
213
+ self._channel_conn = None
214
+ self._queue = None
215
+ self._consumer_tag = None
218
216
  raise
219
217
 
220
218
  finally:
221
- # 【关键修复】必须在持有 Condition 内部锁的情况下调用 notify_all
222
- # 这里使用 async with self._connect_condition: 自动完成 acquire() ... notify_all() ... release()
223
- async with self._connect_condition:
219
+ # === 阶段 D: 恢复状态并通知 ===
220
+ await self._connect_condition.acquire()
221
+ try:
224
222
  self._connecting = False
225
223
  self._connect_condition.notify_all()
224
+ finally:
225
+ self._connect_condition.release()
226
226
 
227
227
  async def _safe_reconnect(self):
228
228
  """安全重连任务(仅用于被动监听连接关闭)"""
@@ -258,24 +258,28 @@ class RabbitMQClient:
258
258
  async def _process_message_callback(self, message: AbstractIncomingMessage):
259
259
  try:
260
260
  msg_obj: MQMsgModel
261
+
262
+ # 1. 解析消息
261
263
  if self.auto_parse_json:
262
264
  try:
263
265
  body_dict = json.loads(message.body.decode("utf-8"))
264
266
  msg_obj = MQMsgModel(**body_dict)
265
267
  except json.JSONDecodeError as e:
266
268
  logger.error(f"JSON解析失败: {e}")
267
- await message.nack(requeue=False)
269
+ await message.reject(requeue=False)
268
270
  return
269
271
  else:
270
272
  msg_obj = MQMsgModel(
271
273
  body=message.body.decode("utf-8"),
272
274
  routing_key=message.routing_key,
273
275
  delivery_tag=message.delivery_tag,
274
- traceId=message.headers.get("trace-id"),
276
+ traceId=message.headers.get(
277
+ "trace-id") if message.headers else SYLogger.get_trace_id(),
275
278
  )
276
279
 
277
280
  SYLogger.set_trace_id(msg_obj.traceId)
278
281
 
282
+ # 3. 执行业务逻辑
279
283
  if self._message_handler:
280
284
  await self._message_handler(msg_obj, message)
281
285
 
@@ -283,28 +287,7 @@ class RabbitMQClient:
283
287
 
284
288
  except Exception as e:
285
289
  logger.error(f"消息处理异常: {e}", exc_info=True)
286
- headers = dict(message.headers) if message.headers else {}
287
- current_retry = int(headers.get("x-retry-count", 0))
288
-
289
- if current_retry >= 3:
290
- logger.warning(f"重试次数超限,丢弃消息: {message.delivery_tag}")
291
- await message.reject(requeue=False)
292
- else:
293
- headers["x-retry-count"] = current_retry + 1
294
- try:
295
- new_msg = Message(
296
- body=message.body,
297
- headers=headers,
298
- content_type=message.content_type,
299
- delivery_mode=message.delivery_mode
300
- )
301
- # 这里的 publish 如果失败,会触发重连机制
302
- # 但注意,当前是在回调线程中,建议做好异常捕获
303
- await self._exchange.publish(new_msg, routing_key=message.routing_key)
304
- await message.ack()
305
- except Exception as pub_err:
306
- logger.error(f"重试发布失败: {pub_err}")
307
- await message.reject(requeue=False)
290
+ await message.ack()
308
291
 
309
292
  async def start_consuming(self) -> Optional[ConsumerTag]:
310
293
  if self._closed:
@@ -2,6 +2,7 @@ import threading
2
2
  import time
3
3
  from typing import Optional
4
4
  import nacos
5
+ from sycommon.config.Config import Config
5
6
  from sycommon.logging.kafka_log import SYLogger
6
7
 
7
8
 
@@ -94,8 +95,9 @@ class NacosClientBase:
94
95
 
95
96
  try:
96
97
  namespace_id = self.nacos_config['namespaceId']
98
+ service_name = Config().config.get('Name', '')
97
99
  self.nacos_client.list_naming_instance(
98
- service_name="", # 空服务名仅用于验证连接
100
+ service_name=service_name,
99
101
  namespace_id=namespace_id,
100
102
  group_name="DEFAULT_GROUP",
101
103
  healthy_only=True
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sycommon-python-lib
3
- Version: 0.1.57b1
3
+ Version: 0.1.57b7
4
4
  Summary: Add your description here
5
5
  Requires-Python: >=3.11
6
6
  Description-Content-Type: text/markdown
@@ -25,6 +25,7 @@ Requires-Dist: pyyaml>=6.0.3
25
25
  Requires-Dist: sentry-sdk[fastapi]>=2.49.0
26
26
  Requires-Dist: sqlalchemy[asyncio]>=2.0.45
27
27
  Requires-Dist: starlette>=0.50.0
28
+ Requires-Dist: tiktoken>=0.12.0
28
29
  Requires-Dist: uvicorn>=0.40.0
29
30
 
30
31
  # sycommon-python-lib
@@ -19,13 +19,13 @@ sycommon/health/health_check.py,sha256=EhfbhspRpQiKJaxdtE-PzpKQO_ucaFKtQxIm16F5M
19
19
  sycommon/health/metrics.py,sha256=fHqO73JuhoZkNPR-xIlxieXiTCvttq-kG-tvxag1s1s,268
20
20
  sycommon/health/ping.py,sha256=FTlnIKk5y1mPfS1ZGOeT5IM_2udF5aqVLubEtuBp18M,250
21
21
  sycommon/llm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
- sycommon/llm/embedding.py,sha256=HknwDqXmRQcAZ8-6d8wZ6n7Bv7HtxTajDt1vvzHGeFQ,8411
23
- sycommon/llm/get_llm.py,sha256=C48gt9GCwEpR26M-cUjM74_t-el18ZvlwpGhcQfR3gs,1054
22
+ sycommon/llm/embedding.py,sha256=Qi9mHQiOUVEcZd4elAaqsAyofdeLtPgU_LF35KlIFU4,14735
23
+ sycommon/llm/get_llm.py,sha256=eZtVx9yNl-VO3O6AhZzCECRTDXRsUTcR88os8hGyJIY,1241
24
24
  sycommon/llm/llm_logger.py,sha256=n4UeNy_-g4oHQOsw-VUzF4uo3JVRLtxaMp1FcI8FiEo,5437
25
25
  sycommon/llm/llm_tokens.py,sha256=-udDyFcmyzx6UAwIi6_d_wwI5kMd5w0-WcS2soVPQxg,4309
26
- sycommon/llm/struct_token.py,sha256=jlpZnTOLDmRDdrCuxZe-1pQopd6OmCM9B_gWZ48CnEQ,7655
26
+ sycommon/llm/struct_token.py,sha256=s8HQf6ZdqKRAnanAo8yPwAM-Ez4P9gGlhI_zBEffphM,12301
27
27
  sycommon/llm/sy_langfuse.py,sha256=NZv6ydfn3-cxqQvuB5WdnM9GYliO9qB_RWh_XqIS3VU,3692
28
- sycommon/llm/usage_token.py,sha256=n0hytuaHI4tJi6wuOS3bd-yWzQjZ-lx5w9egHs8uYgg,5140
28
+ sycommon/llm/usage_token.py,sha256=CDoA_UeZKpNvxH0vNZ8f58tfLV3wC4kd5e1Oferyy9s,5318
29
29
  sycommon/logging/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
30
30
  sycommon/logging/async_sql_logger.py,sha256=_OY36XkUm__U3NhMgiecy-qd-nptZ_0gpE3J8lGAr58,2619
31
31
  sycommon/logging/kafka_log.py,sha256=gfOqdZe0HJ3PkIFfnNWG4DZVadxsCKJ6AmelR7_Z1Xs,9960
@@ -51,7 +51,7 @@ sycommon/models/mqsend_config.py,sha256=NQX9dc8PpuquMG36GCVhJe8omAW1KVXXqr6lSRU6
51
51
  sycommon/models/sso_user.py,sha256=i1WAN6k5sPcPApQEdtjpWDy7VrzWLpOrOQewGLGoGIw,2702
52
52
  sycommon/notice/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
53
53
  sycommon/notice/uvicorn_monitor.py,sha256=VryQYcAtjijJuGDBimbVurgwxlsLaLtkNnABPDY5Tao,7332
54
- sycommon/rabbitmq/rabbitmq_client.py,sha256=hAbLOioU_clucJ9xq88Oo-waZOuU0ii4yBVGIjz1nBE,17992
54
+ sycommon/rabbitmq/rabbitmq_client.py,sha256=pAhyLfuHl72szLBr-nRqv-QKseT9x_QCR6Mu-IonP1U,16689
55
55
  sycommon/rabbitmq/rabbitmq_pool.py,sha256=BiFQgZPzSAFR-n5XhyIafoeWQXETF_31nFRDhMbe6aU,15577
56
56
  sycommon/rabbitmq/rabbitmq_service.py,sha256=XSHo9HuIJ_lq-vizRh4xJVdZr_2zLqeLhot09qb0euA,2025
57
57
  sycommon/rabbitmq/rabbitmq_service_client_manager.py,sha256=IP9TMFeG5LSrwFPEmOy1ce4baPxBUZnWJZR3nN_-XR4,8009
@@ -69,7 +69,7 @@ sycommon/synacos/example.py,sha256=61XL03tU8WTNOo3FUduf93F2fAwah1S0lbH1ufhRhRk,5
69
69
  sycommon/synacos/example2.py,sha256=adUaru3Hy482KrOA17DfaC4nwvLj8etIDS_KrWLWmCU,4811
70
70
  sycommon/synacos/feign.py,sha256=frB3D5LeFDtT3pJLFOwFzEOrNAJKeQNGk-BzUg9T3WM,8295
71
71
  sycommon/synacos/feign_client.py,sha256=ExO7Pd5B3eFKDjXqBRc260K1jkI49IYguLwJJaD2R-o,16166
72
- sycommon/synacos/nacos_client_base.py,sha256=l5jpall6nEt0Hy07Wk-PVU0VN0BmD_Mmtldmtyvvksg,4526
72
+ sycommon/synacos/nacos_client_base.py,sha256=KZgQAg9Imfr_TfM-4LXdtrnTdJ-beu6bcNJa0c2HauE,4600
73
73
  sycommon/synacos/nacos_config_manager.py,sha256=Cff-4gpp0aD7sQVi-nEvDO4BWqK9abEDDDJ9qXKFQgs,4399
74
74
  sycommon/synacos/nacos_heartbeat_manager.py,sha256=G80_pOn37WdO_HpYUiAfpwMqAxW0ff0Bnw0NEuge9v0,5568
75
75
  sycommon/synacos/nacos_service.py,sha256=BezQ1eDIYwBPE567Po_Qh1Ki_z9WmhZy1J1NiTPbdHY,6118
@@ -82,8 +82,8 @@ sycommon/tools/env.py,sha256=Ah-tBwG2C0_hwLGFebVQgKdWWXCjTzBuF23gCkLHYy4,2437
82
82
  sycommon/tools/merge_headers.py,sha256=u9u8_1ZIuGIminWsw45YJ5qnsx9MB-Fot0VPge7itPw,4941
83
83
  sycommon/tools/snowflake.py,sha256=xQlYXwYnI85kSJ1rZ89gMVBhzemP03xrMPVX9vVa3MY,9228
84
84
  sycommon/tools/timing.py,sha256=OiiE7P07lRoMzX9kzb8sZU9cDb0zNnqIlY5pWqHcnkY,2064
85
- sycommon_python_lib-0.1.57b1.dist-info/METADATA,sha256=SSwWUy9hRgJhiqL1ulY91dBhEegshkvsXXLTKxNnXTc,7301
86
- sycommon_python_lib-0.1.57b1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
87
- sycommon_python_lib-0.1.57b1.dist-info/entry_points.txt,sha256=q_h2nbvhhmdnsOUZEIwpuoDjaNfBF9XqppDEmQn9d_A,46
88
- sycommon_python_lib-0.1.57b1.dist-info/top_level.txt,sha256=98CJ-cyM2WIKxLz-Pf0AitWLhJyrfXvyY8slwjTXNuc,17
89
- sycommon_python_lib-0.1.57b1.dist-info/RECORD,,
85
+ sycommon_python_lib-0.1.57b7.dist-info/METADATA,sha256=e43DQ_eU9kbXuhiZpJF4ys1betxau0wepQuOm6v4l1c,7333
86
+ sycommon_python_lib-0.1.57b7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
87
+ sycommon_python_lib-0.1.57b7.dist-info/entry_points.txt,sha256=q_h2nbvhhmdnsOUZEIwpuoDjaNfBF9XqppDEmQn9d_A,46
88
+ sycommon_python_lib-0.1.57b7.dist-info/top_level.txt,sha256=98CJ-cyM2WIKxLz-Pf0AitWLhJyrfXvyY8slwjTXNuc,17
89
+ sycommon_python_lib-0.1.57b7.dist-info/RECORD,,