sycommon-python-lib 0.1.55b1__py3-none-any.whl → 0.1.56__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sycommon/config/Config.py +29 -4
- sycommon/config/LangfuseConfig.py +15 -0
- sycommon/config/RerankerConfig.py +1 -0
- sycommon/config/SentryConfig.py +13 -0
- sycommon/llm/__init__.py +0 -0
- sycommon/llm/embedding.py +204 -0
- sycommon/llm/get_llm.py +37 -0
- sycommon/llm/llm_logger.py +126 -0
- sycommon/llm/llm_tokens.py +119 -0
- sycommon/llm/struct_token.py +192 -0
- sycommon/llm/sy_langfuse.py +103 -0
- sycommon/llm/usage_token.py +117 -0
- sycommon/logging/kafka_log.py +200 -434
- sycommon/logging/logger_levels.py +23 -0
- sycommon/middleware/context.py +2 -0
- sycommon/middleware/exception.py +10 -16
- sycommon/middleware/timeout.py +2 -1
- sycommon/middleware/traceid.py +174 -48
- sycommon/notice/__init__.py +0 -0
- sycommon/notice/uvicorn_monitor.py +200 -0
- sycommon/rabbitmq/rabbitmq_client.py +232 -242
- sycommon/rabbitmq/rabbitmq_pool.py +278 -218
- sycommon/rabbitmq/rabbitmq_service.py +25 -843
- sycommon/rabbitmq/rabbitmq_service_client_manager.py +206 -0
- sycommon/rabbitmq/rabbitmq_service_connection_monitor.py +73 -0
- sycommon/rabbitmq/rabbitmq_service_consumer_manager.py +285 -0
- sycommon/rabbitmq/rabbitmq_service_core.py +117 -0
- sycommon/rabbitmq/rabbitmq_service_producer_manager.py +238 -0
- sycommon/sentry/__init__.py +0 -0
- sycommon/sentry/sy_sentry.py +35 -0
- sycommon/services.py +124 -96
- sycommon/synacos/feign.py +8 -3
- sycommon/synacos/feign_client.py +22 -8
- sycommon/synacos/nacos_client_base.py +119 -0
- sycommon/synacos/nacos_config_manager.py +107 -0
- sycommon/synacos/nacos_heartbeat_manager.py +144 -0
- sycommon/synacos/nacos_service.py +64 -771
- sycommon/synacos/nacos_service_discovery.py +157 -0
- sycommon/synacos/nacos_service_registration.py +270 -0
- sycommon/tools/env.py +62 -0
- sycommon/tools/merge_headers.py +117 -0
- sycommon/tools/snowflake.py +101 -153
- {sycommon_python_lib-0.1.55b1.dist-info → sycommon_python_lib-0.1.56.dist-info}/METADATA +11 -5
- sycommon_python_lib-0.1.56.dist-info/RECORD +89 -0
- sycommon_python_lib-0.1.55b1.dist-info/RECORD +0 -62
- {sycommon_python_lib-0.1.55b1.dist-info → sycommon_python_lib-0.1.56.dist-info}/WHEEL +0 -0
- {sycommon_python_lib-0.1.55b1.dist-info → sycommon_python_lib-0.1.56.dist-info}/entry_points.txt +0 -0
- {sycommon_python_lib-0.1.55b1.dist-info → sycommon_python_lib-0.1.56.dist-info}/top_level.txt +0 -0
sycommon/config/Config.py
CHANGED
|
@@ -15,14 +15,13 @@ class Config(metaclass=SingletonMeta):
|
|
|
15
15
|
with open(config_file, 'r', encoding='utf-8') as f:
|
|
16
16
|
self.config = yaml.safe_load(f)
|
|
17
17
|
self.MaxBytes = self.config.get('MaxBytes', 209715200)
|
|
18
|
-
self.Timeout = self.config.get('Timeout',
|
|
19
|
-
self.OCR = self.config.get('OCR', None)
|
|
20
|
-
self.INVOICE_OCR = self.config.get('INVOICE_OCR', None)
|
|
21
|
-
self.UnstructuredAPI = self.config.get('UnstructuredAPI', None)
|
|
18
|
+
self.Timeout = self.config.get('Timeout', 600000)
|
|
22
19
|
self.MaxRetries = self.config.get('MaxRetries', 3)
|
|
23
20
|
self.llm_configs = []
|
|
24
21
|
self.embedding_configs = []
|
|
25
22
|
self.reranker_configs = []
|
|
23
|
+
self.sentry_configs = []
|
|
24
|
+
self.langfuse_configs = []
|
|
26
25
|
self._process_config()
|
|
27
26
|
|
|
28
27
|
def get_llm_config(self, model_name):
|
|
@@ -43,6 +42,18 @@ class Config(metaclass=SingletonMeta):
|
|
|
43
42
|
return llm
|
|
44
43
|
raise ValueError(f"No configuration found for model: {model_name}")
|
|
45
44
|
|
|
45
|
+
def get_sentry_config(self, name):
|
|
46
|
+
for sentry in self.sentry_configs:
|
|
47
|
+
if sentry.get('name') == name:
|
|
48
|
+
return sentry
|
|
49
|
+
raise ValueError(f"No configuration found for server: {name}")
|
|
50
|
+
|
|
51
|
+
def get_langfuse_config(self, name):
|
|
52
|
+
for langfuse in self.langfuse_configs:
|
|
53
|
+
if langfuse.get('name') == name:
|
|
54
|
+
return langfuse
|
|
55
|
+
raise ValueError(f"No configuration found for server: {name}")
|
|
56
|
+
|
|
46
57
|
def _process_config(self):
|
|
47
58
|
llm_config_list = self.config.get('LLMConfig', [])
|
|
48
59
|
for llm_config in llm_config_list:
|
|
@@ -71,3 +82,17 @@ class Config(metaclass=SingletonMeta):
|
|
|
71
82
|
self.reranker_configs.append(validated_config.model_dump())
|
|
72
83
|
except ValueError as e:
|
|
73
84
|
print(f"Invalid LLM configuration: {e}")
|
|
85
|
+
|
|
86
|
+
sentry_config_list = self.config.get('SentryConfig', [])
|
|
87
|
+
for sentry_config in sentry_config_list:
|
|
88
|
+
try:
|
|
89
|
+
from sycommon.config.SentryConfig import SentryConfig
|
|
90
|
+
validated_config = SentryConfig(**sentry_config)
|
|
91
|
+
self.sentry_configs.append(validated_config.model_dump())
|
|
92
|
+
except ValueError as e:
|
|
93
|
+
print(f"Invalid Sentry configuration: {e}")
|
|
94
|
+
|
|
95
|
+
def set_attr(self, share_configs: dict):
|
|
96
|
+
self.config = {**self.config, **
|
|
97
|
+
share_configs.get('llm', {}), **share_configs}
|
|
98
|
+
self._process_config()
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from pydantic import BaseModel
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class LangfuseConfig(BaseModel):
|
|
5
|
+
name: str
|
|
6
|
+
secretKey: str
|
|
7
|
+
publicKey: str
|
|
8
|
+
baseUrl: str
|
|
9
|
+
enable: bool
|
|
10
|
+
|
|
11
|
+
@classmethod
|
|
12
|
+
def from_config(cls, server_name: str):
|
|
13
|
+
from sycommon.config.Config import Config
|
|
14
|
+
langfuse_config = Config().get_langfuse_config(server_name)
|
|
15
|
+
return cls(**langfuse_config)
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from pydantic import BaseModel
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class SentryConfig(BaseModel):
|
|
5
|
+
name: str
|
|
6
|
+
dsn: str
|
|
7
|
+
enable: bool
|
|
8
|
+
|
|
9
|
+
@classmethod
|
|
10
|
+
def from_config(cls, server_name: str):
|
|
11
|
+
from sycommon.config.Config import Config
|
|
12
|
+
sentry_config = Config().get_sentry_config(server_name)
|
|
13
|
+
return cls(**sentry_config)
|
sycommon/llm/__init__.py
ADDED
|
File without changes
|
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import json
|
|
3
|
+
import aiohttp
|
|
4
|
+
from typing import Union, List, Optional
|
|
5
|
+
|
|
6
|
+
from sycommon.config.Config import SingletonMeta
|
|
7
|
+
from sycommon.config.EmbeddingConfig import EmbeddingConfig
|
|
8
|
+
from sycommon.config.RerankerConfig import RerankerConfig
|
|
9
|
+
from sycommon.logging.kafka_log import SYLogger
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class Embedding(metaclass=SingletonMeta):
|
|
13
|
+
def __init__(self):
|
|
14
|
+
# 1. 并发限制
|
|
15
|
+
self.max_concurrency = 20
|
|
16
|
+
# 保留默认模型名称
|
|
17
|
+
self.default_embedding_model = "bge-large-zh-v1.5"
|
|
18
|
+
self.default_reranker_model = "bge-reranker-large"
|
|
19
|
+
|
|
20
|
+
# 初始化默认模型的基础URL
|
|
21
|
+
self.embeddings_base_url = EmbeddingConfig.from_config(
|
|
22
|
+
self.default_embedding_model).baseUrl
|
|
23
|
+
self.reranker_base_url = RerankerConfig.from_config(
|
|
24
|
+
self.default_reranker_model).baseUrl
|
|
25
|
+
|
|
26
|
+
# 并发信号量
|
|
27
|
+
self.semaphore = asyncio.Semaphore(self.max_concurrency)
|
|
28
|
+
# 全局默认超时:永不超时(None)
|
|
29
|
+
self.default_timeout = aiohttp.ClientTimeout(total=None)
|
|
30
|
+
|
|
31
|
+
async def _get_embeddings_http_async(
|
|
32
|
+
self,
|
|
33
|
+
input: Union[str, List[str]],
|
|
34
|
+
encoding_format: str = None,
|
|
35
|
+
model: str = None,
|
|
36
|
+
timeout: aiohttp.ClientTimeout = None,
|
|
37
|
+
**kwargs
|
|
38
|
+
):
|
|
39
|
+
async with self.semaphore:
|
|
40
|
+
# 优先使用传入的超时,无则用全局默认
|
|
41
|
+
request_timeout = timeout or self.default_timeout
|
|
42
|
+
|
|
43
|
+
# 优先使用传入的模型名,无则用默认值
|
|
44
|
+
target_model = model or self.default_embedding_model
|
|
45
|
+
target_base_url = EmbeddingConfig.from_config(target_model).baseUrl
|
|
46
|
+
url = f"{target_base_url}/v1/embeddings"
|
|
47
|
+
|
|
48
|
+
request_body = {
|
|
49
|
+
"model": target_model,
|
|
50
|
+
"input": input,
|
|
51
|
+
"encoding_format": encoding_format or "float"
|
|
52
|
+
}
|
|
53
|
+
request_body.update(kwargs)
|
|
54
|
+
|
|
55
|
+
try:
|
|
56
|
+
async with aiohttp.ClientSession(timeout=request_timeout) as session:
|
|
57
|
+
async with session.post(url, json=request_body) as response:
|
|
58
|
+
if response.status != 200:
|
|
59
|
+
error_detail = await response.text()
|
|
60
|
+
SYLogger.error(
|
|
61
|
+
f"Embedding request failed (model: {target_model}): {error_detail}")
|
|
62
|
+
return None
|
|
63
|
+
return await response.json()
|
|
64
|
+
except asyncio.TimeoutError:
|
|
65
|
+
SYLogger.error(
|
|
66
|
+
f"Embedding request timeout (model: {target_model})")
|
|
67
|
+
return None
|
|
68
|
+
except Exception as e:
|
|
69
|
+
SYLogger.error(
|
|
70
|
+
f"Embedding request unexpected error (model: {target_model}): {str(e)}")
|
|
71
|
+
return None
|
|
72
|
+
|
|
73
|
+
async def _get_reranker_http_async(
|
|
74
|
+
self,
|
|
75
|
+
documents: List[str],
|
|
76
|
+
query: str,
|
|
77
|
+
top_n: Optional[int] = None,
|
|
78
|
+
model: str = None,
|
|
79
|
+
max_chunks_per_doc: Optional[int] = None,
|
|
80
|
+
return_documents: Optional[bool] = True,
|
|
81
|
+
return_len: Optional[bool] = True,
|
|
82
|
+
timeout: aiohttp.ClientTimeout = None,
|
|
83
|
+
**kwargs
|
|
84
|
+
):
|
|
85
|
+
async with self.semaphore:
|
|
86
|
+
# 优先使用传入的超时,无则用全局默认
|
|
87
|
+
request_timeout = timeout or self.default_timeout
|
|
88
|
+
|
|
89
|
+
# 优先使用传入的模型名,无则用默认值
|
|
90
|
+
target_model = model or self.default_reranker_model
|
|
91
|
+
target_base_url = RerankerConfig.from_config(target_model).baseUrl
|
|
92
|
+
url = f"{target_base_url}/v1/rerank"
|
|
93
|
+
|
|
94
|
+
request_body = {
|
|
95
|
+
"model": target_model,
|
|
96
|
+
"documents": documents,
|
|
97
|
+
"query": query,
|
|
98
|
+
"top_n": top_n or len(documents),
|
|
99
|
+
"max_chunks_per_doc": max_chunks_per_doc,
|
|
100
|
+
"return_documents": return_documents,
|
|
101
|
+
"return_len": return_len,
|
|
102
|
+
"kwargs": json.dumps(kwargs),
|
|
103
|
+
}
|
|
104
|
+
request_body.update(kwargs)
|
|
105
|
+
|
|
106
|
+
try:
|
|
107
|
+
async with aiohttp.ClientSession(timeout=request_timeout) as session:
|
|
108
|
+
async with session.post(url, json=request_body) as response:
|
|
109
|
+
if response.status != 200:
|
|
110
|
+
error_detail = await response.text()
|
|
111
|
+
SYLogger.error(
|
|
112
|
+
f"Rerank request failed (model: {target_model}): {error_detail}")
|
|
113
|
+
return None
|
|
114
|
+
return await response.json()
|
|
115
|
+
except asyncio.TimeoutError:
|
|
116
|
+
SYLogger.error(
|
|
117
|
+
f"Rerank request timeout (model: {target_model})")
|
|
118
|
+
return None
|
|
119
|
+
except Exception as e:
|
|
120
|
+
SYLogger.error(
|
|
121
|
+
f"Rerank request unexpected error (model: {target_model}): {str(e)}")
|
|
122
|
+
return None
|
|
123
|
+
|
|
124
|
+
async def get_embeddings(
|
|
125
|
+
self,
|
|
126
|
+
corpus: List[str],
|
|
127
|
+
model: str = None,
|
|
128
|
+
timeout: Optional[Union[int, float]] = None
|
|
129
|
+
):
|
|
130
|
+
"""
|
|
131
|
+
获取语料库的嵌入向量,结果顺序与输入语料库顺序一致
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
corpus: 待生成嵌入向量的文本列表
|
|
135
|
+
model: 可选,指定使用的embedding模型名称,默认使用bge-large-zh-v1.5
|
|
136
|
+
timeout: 可选,超时时间(秒):
|
|
137
|
+
- 传int/float:表示总超时时间(秒)
|
|
138
|
+
- 不传/None:使用默认永不超时配置
|
|
139
|
+
"""
|
|
140
|
+
request_timeout = None
|
|
141
|
+
if timeout is not None:
|
|
142
|
+
if isinstance(timeout, (int, float)):
|
|
143
|
+
request_timeout = aiohttp.ClientTimeout(total=timeout)
|
|
144
|
+
else:
|
|
145
|
+
SYLogger.warning(
|
|
146
|
+
f"Invalid timeout type: {type(timeout)}, must be int/float, use default timeout")
|
|
147
|
+
|
|
148
|
+
SYLogger.info(
|
|
149
|
+
f"Requesting embeddings for corpus: {corpus} (model: {model or self.default_embedding_model}, max_concurrency: {self.max_concurrency}, timeout: {timeout or 'None'})")
|
|
150
|
+
|
|
151
|
+
# 给每个异步任务传入模型名称和超时配置
|
|
152
|
+
tasks = [self._get_embeddings_http_async(
|
|
153
|
+
text, model=model, timeout=request_timeout) for text in corpus]
|
|
154
|
+
results = await asyncio.gather(*tasks)
|
|
155
|
+
|
|
156
|
+
vectors = []
|
|
157
|
+
for result in results:
|
|
158
|
+
if result is None:
|
|
159
|
+
zero_vector = [0.0] * 1024
|
|
160
|
+
vectors.append(zero_vector)
|
|
161
|
+
SYLogger.warning(
|
|
162
|
+
f"Embedding request failed, append zero vector (1024D)")
|
|
163
|
+
continue
|
|
164
|
+
for item in result["data"]:
|
|
165
|
+
vectors.append(item["embedding"])
|
|
166
|
+
|
|
167
|
+
SYLogger.info(
|
|
168
|
+
f"Embeddings for corpus: {corpus} created (model: {model or self.default_embedding_model})")
|
|
169
|
+
return vectors
|
|
170
|
+
|
|
171
|
+
async def get_reranker(
|
|
172
|
+
self,
|
|
173
|
+
top_results: List[str],
|
|
174
|
+
query: str,
|
|
175
|
+
model: str = None,
|
|
176
|
+
timeout: Optional[Union[int, float]] = None
|
|
177
|
+
):
|
|
178
|
+
"""
|
|
179
|
+
对搜索结果进行重排序
|
|
180
|
+
|
|
181
|
+
Args:
|
|
182
|
+
top_results: 待重排序的文本列表
|
|
183
|
+
query: 排序参考的查询语句
|
|
184
|
+
model: 可选,指定使用的reranker模型名称,默认使用bge-reranker-large
|
|
185
|
+
timeout: 可选,超时时间(秒):
|
|
186
|
+
- 传int/float:表示总超时时间(秒)
|
|
187
|
+
- 不传/None:使用默认永不超时配置
|
|
188
|
+
"""
|
|
189
|
+
request_timeout = None
|
|
190
|
+
if timeout is not None:
|
|
191
|
+
if isinstance(timeout, (int, float)):
|
|
192
|
+
request_timeout = aiohttp.ClientTimeout(total=timeout)
|
|
193
|
+
else:
|
|
194
|
+
SYLogger.warning(
|
|
195
|
+
f"Invalid timeout type: {type(timeout)}, must be int/float, use default timeout")
|
|
196
|
+
|
|
197
|
+
SYLogger.info(
|
|
198
|
+
f"Requesting reranker for top_results: {top_results} (model: {model or self.default_reranker_model}, max_concurrency: {self.max_concurrency}, timeout: {timeout or 'None'})")
|
|
199
|
+
|
|
200
|
+
data = await self._get_reranker_http_async(
|
|
201
|
+
top_results, query, model=model, timeout=request_timeout)
|
|
202
|
+
SYLogger.info(
|
|
203
|
+
f"Reranker for top_results: {top_results} completed (model: {model or self.default_reranker_model})")
|
|
204
|
+
return data
|
sycommon/llm/get_llm.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
from sycommon.llm.llm_logger import LLMLogger
|
|
2
|
+
from langchain.chat_models import init_chat_model
|
|
3
|
+
from sycommon.config.LLMConfig import LLMConfig
|
|
4
|
+
from sycommon.llm.sy_langfuse import LangfuseInitializer
|
|
5
|
+
from sycommon.llm.usage_token import LLMWithAutoTokenUsage
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def get_llm(
|
|
9
|
+
model: str = None,
|
|
10
|
+
streaming: bool = False
|
|
11
|
+
) -> LLMWithAutoTokenUsage:
|
|
12
|
+
if not model:
|
|
13
|
+
model = "Qwen2.5-72B"
|
|
14
|
+
|
|
15
|
+
llmConfig = LLMConfig.from_config(model)
|
|
16
|
+
if not llmConfig:
|
|
17
|
+
raise Exception(f"无效的模型配置:{model}")
|
|
18
|
+
|
|
19
|
+
# 初始化Langfuse
|
|
20
|
+
langfuse_callbacks, langfuse = LangfuseInitializer.get()
|
|
21
|
+
|
|
22
|
+
callbacks = [LLMLogger()] + langfuse_callbacks
|
|
23
|
+
|
|
24
|
+
llm = init_chat_model(
|
|
25
|
+
model_provider=llmConfig.provider,
|
|
26
|
+
model=llmConfig.model,
|
|
27
|
+
base_url=llmConfig.baseUrl,
|
|
28
|
+
api_key="-",
|
|
29
|
+
temperature=0.1,
|
|
30
|
+
streaming=streaming,
|
|
31
|
+
callbacks=callbacks
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
if llm is None:
|
|
35
|
+
raise Exception(f"初始化原始LLM实例失败:{model}")
|
|
36
|
+
|
|
37
|
+
return LLMWithAutoTokenUsage(llm, langfuse)
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
from langchain_core.callbacks import AsyncCallbackHandler
|
|
2
|
+
from typing import Any, Dict, List
|
|
3
|
+
from langchain_core.outputs import GenerationChunk, ChatGeneration
|
|
4
|
+
from langchain_core.messages import BaseMessage
|
|
5
|
+
|
|
6
|
+
from sycommon.logging.kafka_log import SYLogger
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class LLMLogger(AsyncCallbackHandler):
|
|
10
|
+
"""
|
|
11
|
+
通用LLM日志回调处理器,同时支持:
|
|
12
|
+
- 同步调用(如 chain.invoke())
|
|
13
|
+
- 异步调用(如 chain.astream())
|
|
14
|
+
- 聊天模型调用
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
# ------------------------------
|
|
18
|
+
# 同步回调方法(处理 invoke 等同步调用)
|
|
19
|
+
# ------------------------------
|
|
20
|
+
def on_llm_start(self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any) -> None:
|
|
21
|
+
model_name = serialized.get('name', 'unknown')
|
|
22
|
+
SYLogger.info(
|
|
23
|
+
f"[同步] LLM调用开始 | 模型: {model_name} | 提示词数: {len(prompts)}")
|
|
24
|
+
self._log_prompts(prompts)
|
|
25
|
+
|
|
26
|
+
def on_chat_model_start(
|
|
27
|
+
self,
|
|
28
|
+
serialized: Dict[str, Any],
|
|
29
|
+
messages: List[List[BaseMessage]],
|
|
30
|
+
**kwargs: Any
|
|
31
|
+
) -> None:
|
|
32
|
+
model_name = serialized.get('name', 'unknown')
|
|
33
|
+
SYLogger.info(
|
|
34
|
+
f"[同步] 聊天模型调用开始 | 模型: {model_name} | 消息组数: {len(messages)}")
|
|
35
|
+
self._log_chat_messages(messages)
|
|
36
|
+
|
|
37
|
+
def on_llm_end(self, response: Any, **kwargs: Any) -> None:
|
|
38
|
+
# 处理普通LLM结果
|
|
39
|
+
if hasattr(response, 'generations') and all(
|
|
40
|
+
isinstance(gen[0], GenerationChunk) for gen in response.generations
|
|
41
|
+
):
|
|
42
|
+
for i, generation in enumerate(response.generations):
|
|
43
|
+
result = generation[0].text
|
|
44
|
+
SYLogger.info(
|
|
45
|
+
f"[同步] LLM调用结束 | 结果 #{i+1} 长度: {len(result)}")
|
|
46
|
+
self._log_result(result, i+1)
|
|
47
|
+
# 处理聊天模型结果
|
|
48
|
+
elif hasattr(response, 'generations') and all(
|
|
49
|
+
isinstance(gen[0], ChatGeneration) for gen in response.generations
|
|
50
|
+
):
|
|
51
|
+
for i, generation in enumerate(response.generations):
|
|
52
|
+
result = generation[0].message.content
|
|
53
|
+
SYLogger.info(
|
|
54
|
+
f"[同步] 聊天模型调用结束 | 结果 #{i+1} 长度: {len(result)}")
|
|
55
|
+
self._log_result(result, i+1)
|
|
56
|
+
|
|
57
|
+
def on_llm_error(self, error: Exception, **kwargs: Any) -> None:
|
|
58
|
+
if isinstance(error, GeneratorExit):
|
|
59
|
+
SYLogger.info("[同步] LLM生成器正常关闭")
|
|
60
|
+
return
|
|
61
|
+
SYLogger.error(f"[同步] LLM调用出错: {str(error)}")
|
|
62
|
+
|
|
63
|
+
# ------------------------------
|
|
64
|
+
# 异步回调方法(处理 astream 等异步调用)
|
|
65
|
+
# ------------------------------
|
|
66
|
+
async def on_llm_start(self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any) -> None:
|
|
67
|
+
model_name = serialized.get('name', 'unknown')
|
|
68
|
+
SYLogger.info(
|
|
69
|
+
f"[异步] LLM调用开始 | 模型: {model_name} | 提示词数: {len(prompts)}")
|
|
70
|
+
self._log_prompts(prompts)
|
|
71
|
+
|
|
72
|
+
async def on_chat_model_start(
|
|
73
|
+
self,
|
|
74
|
+
serialized: Dict[str, Any],
|
|
75
|
+
messages: List[List[BaseMessage]],
|
|
76
|
+
**kwargs: Any
|
|
77
|
+
) -> None:
|
|
78
|
+
model_name = serialized.get('name', 'unknown')
|
|
79
|
+
SYLogger.info(
|
|
80
|
+
f"[异步] 聊天模型调用开始 | 模型: {model_name} | 消息组数: {len(messages)}")
|
|
81
|
+
self._log_chat_messages(messages)
|
|
82
|
+
|
|
83
|
+
async def on_llm_end(self, response: Any, **kwargs: Any) -> None:
|
|
84
|
+
# 处理普通LLM结果
|
|
85
|
+
if hasattr(response, 'generations') and all(
|
|
86
|
+
isinstance(gen[0], GenerationChunk) for gen in response.generations
|
|
87
|
+
):
|
|
88
|
+
for i, generation in enumerate(response.generations):
|
|
89
|
+
result = generation[0].text
|
|
90
|
+
SYLogger.info(
|
|
91
|
+
f"[异步] LLM调用结束 | 结果 #{i+1} 长度: {len(result)}")
|
|
92
|
+
self._log_result(result, i+1)
|
|
93
|
+
# 处理聊天模型结果
|
|
94
|
+
elif hasattr(response, 'generations') and all(
|
|
95
|
+
isinstance(gen[0], ChatGeneration) for gen in response.generations
|
|
96
|
+
):
|
|
97
|
+
for i, generation in enumerate(response.generations):
|
|
98
|
+
result = generation[0].message.content
|
|
99
|
+
SYLogger.info(
|
|
100
|
+
f"[异步] 聊天模型调用结束 | 结果 #{i+1} 长度: {len(result)}")
|
|
101
|
+
self._log_result(result, i+1)
|
|
102
|
+
|
|
103
|
+
async def on_llm_error(self, error: Exception, **kwargs: Any) -> None:
|
|
104
|
+
if isinstance(error, GeneratorExit):
|
|
105
|
+
SYLogger.info("[异步] LLM生成器正常关闭")
|
|
106
|
+
return
|
|
107
|
+
SYLogger.error(f"[异步] LLM调用出错: {str(error)}")
|
|
108
|
+
|
|
109
|
+
# ------------------------------
|
|
110
|
+
# 共享工具方法(避免代码重复)
|
|
111
|
+
# ------------------------------
|
|
112
|
+
def _log_prompts(self, prompts: List[str]) -> None:
|
|
113
|
+
"""记录提示词"""
|
|
114
|
+
for i, prompt in enumerate(prompts):
|
|
115
|
+
SYLogger.info(f"提示词 #{i+1}:\n{prompt}")
|
|
116
|
+
|
|
117
|
+
def _log_chat_messages(self, messages: List[List[BaseMessage]]) -> None:
|
|
118
|
+
"""记录聊天模型的消息"""
|
|
119
|
+
for i, message_group in enumerate(messages):
|
|
120
|
+
SYLogger.info(f"消息组 #{i+1}:")
|
|
121
|
+
for msg in message_group:
|
|
122
|
+
SYLogger.info(f" {msg.type}: {msg.content}")
|
|
123
|
+
|
|
124
|
+
def _log_result(self, result: str, index: int) -> None:
|
|
125
|
+
"""记录结果"""
|
|
126
|
+
SYLogger.info(f"结果 #{index}:\n{result}")
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
from langchain_core.callbacks import AsyncCallbackHandler
|
|
3
|
+
from langchain_core.outputs.llm_result import LLMResult
|
|
4
|
+
from sycommon.logging.kafka_log import SYLogger
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class TokensCallbackHandler(AsyncCallbackHandler):
|
|
8
|
+
"""
|
|
9
|
+
继承AsyncCallbackHandler的Token统计处理器
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
def __init__(self):
|
|
13
|
+
super().__init__()
|
|
14
|
+
self.input_tokens = 0
|
|
15
|
+
self.output_tokens = 0
|
|
16
|
+
self.total_tokens = 0
|
|
17
|
+
self.usage_metadata = {}
|
|
18
|
+
self.reset()
|
|
19
|
+
|
|
20
|
+
def reset(self):
|
|
21
|
+
"""重置Token统计数据"""
|
|
22
|
+
self.input_tokens = 0
|
|
23
|
+
self.output_tokens = 0
|
|
24
|
+
self.total_tokens = 0
|
|
25
|
+
self.usage_metadata = {
|
|
26
|
+
"input_tokens": 0,
|
|
27
|
+
"output_tokens": 0,
|
|
28
|
+
"total_tokens": 0
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
# ========== 同步回调方法(兼容签名) ==========
|
|
32
|
+
def on_llm_end(
|
|
33
|
+
self,
|
|
34
|
+
response: LLMResult,
|
|
35
|
+
**kwargs: Any,
|
|
36
|
+
) -> None:
|
|
37
|
+
"""同步LLM调用结束时的回调"""
|
|
38
|
+
self._parse_token_usage(response)
|
|
39
|
+
|
|
40
|
+
# ========== 异步回调方法(兼容签名) ==========
|
|
41
|
+
async def on_llm_end(
|
|
42
|
+
self,
|
|
43
|
+
response: LLMResult,
|
|
44
|
+
**kwargs: Any,
|
|
45
|
+
) -> None:
|
|
46
|
+
"""异步LLM调用结束时的回调"""
|
|
47
|
+
self._parse_token_usage(response)
|
|
48
|
+
|
|
49
|
+
def _parse_token_usage(self, response: LLMResult) -> None:
|
|
50
|
+
"""
|
|
51
|
+
通用Token解析逻辑,不依赖特定类结构
|
|
52
|
+
兼容各种LLM响应格式
|
|
53
|
+
"""
|
|
54
|
+
try:
|
|
55
|
+
# 情况1: 标准LangChain响应(有llm_output属性)
|
|
56
|
+
if response.llm_output:
|
|
57
|
+
llm_output = response.llm_output
|
|
58
|
+
self._parse_from_llm_output(llm_output)
|
|
59
|
+
|
|
60
|
+
# 情况2: 包含generations的响应
|
|
61
|
+
elif response.generations:
|
|
62
|
+
self._parse_from_generations(response.generations)
|
|
63
|
+
|
|
64
|
+
# 计算总Token
|
|
65
|
+
if self.total_tokens <= 0:
|
|
66
|
+
self.total_tokens = self.input_tokens + self.output_tokens
|
|
67
|
+
|
|
68
|
+
# 更新metadata
|
|
69
|
+
self.usage_metadata = {
|
|
70
|
+
"input_tokens": self.input_tokens,
|
|
71
|
+
"output_tokens": self.output_tokens,
|
|
72
|
+
"total_tokens": self.total_tokens
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
SYLogger.debug(
|
|
76
|
+
f"Token统计成功 - 输入: {self.input_tokens}, 输出: {self.output_tokens}")
|
|
77
|
+
|
|
78
|
+
except Exception as e:
|
|
79
|
+
SYLogger.warning(f"Token解析失败: {str(e)}", exc_info=True)
|
|
80
|
+
self.reset()
|
|
81
|
+
|
|
82
|
+
def _parse_from_llm_output(self, llm_output: dict) -> None:
|
|
83
|
+
"""从llm_output字典解析Token信息"""
|
|
84
|
+
if not isinstance(llm_output, dict):
|
|
85
|
+
return
|
|
86
|
+
|
|
87
|
+
# OpenAI标准格式
|
|
88
|
+
if 'token_usage' in llm_output:
|
|
89
|
+
token_usage = llm_output['token_usage']
|
|
90
|
+
self.input_tokens = token_usage.get(
|
|
91
|
+
'prompt_tokens', token_usage.get('input_tokens', 0))
|
|
92
|
+
self.output_tokens = token_usage.get(
|
|
93
|
+
'completion_tokens', token_usage.get('output_tokens', 0))
|
|
94
|
+
self.total_tokens = token_usage.get('total_tokens', 0)
|
|
95
|
+
|
|
96
|
+
# 直接包含Token信息
|
|
97
|
+
else:
|
|
98
|
+
self.input_tokens = llm_output.get(
|
|
99
|
+
'input_tokens', llm_output.get('prompt_tokens', 0))
|
|
100
|
+
self.output_tokens = llm_output.get(
|
|
101
|
+
'output_tokens', llm_output.get('completion_tokens', 0))
|
|
102
|
+
self.total_tokens = token_usage.get('total_tokens', 0)
|
|
103
|
+
|
|
104
|
+
def _parse_from_generations(self, generations: list) -> None:
|
|
105
|
+
"""从generations列表解析Token信息"""
|
|
106
|
+
if not isinstance(generations, list) or len(generations) == 0:
|
|
107
|
+
return
|
|
108
|
+
|
|
109
|
+
# 遍历generation信息
|
|
110
|
+
for gen_group in generations:
|
|
111
|
+
for generation in gen_group:
|
|
112
|
+
if hasattr(generation, 'generation_info') and generation.generation_info:
|
|
113
|
+
gen_info = generation.generation_info
|
|
114
|
+
self.input_tokens = gen_info.get(
|
|
115
|
+
'input_tokens', gen_info.get('prompt_tokens', 0))
|
|
116
|
+
self.output_tokens = gen_info.get(
|
|
117
|
+
'output_tokens', gen_info.get('completion_tokens', 0))
|
|
118
|
+
self.total_tokens = gen_info.get('total_tokens', 0)
|
|
119
|
+
return
|