sycommon-python-lib 0.2.2a18__py3-none-any.whl → 0.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sycommon/agent/deep_agent.py +58 -8
- sycommon/agent/multi_agent_team.py +8 -3
- sycommon/agent/sandbox/file_ops.py +11 -3
- sycommon/agent/summarization_utils.py +133 -9
- sycommon/config/LLMConfig.py +1 -0
- sycommon/llm/get_llm.py +6 -2
- sycommon/llm/llm_with_token_tracking.py +17 -0
- sycommon/llm/struct_token.py +6 -8
- sycommon/llm/sy_langfuse.py +21 -6
- sycommon/middleware/tool_result_truncation.py +192 -0
- sycommon/models/mqlistener_config.py +1 -1
- sycommon/notice/__init__.py +5 -0
- sycommon/notice/wecom_message.py +328 -0
- sycommon/rabbitmq/rabbitmq_client.py +56 -8
- sycommon/tests/test_real_summarization.py +166 -0
- sycommon/tests/test_summarization_config.py +463 -0
- sycommon/tests/test_summarization_real.py +329 -0
- {sycommon_python_lib-0.2.2a18.dist-info → sycommon_python_lib-0.2.3.dist-info}/METADATA +10 -10
- {sycommon_python_lib-0.2.2a18.dist-info → sycommon_python_lib-0.2.3.dist-info}/RECORD +22 -17
- {sycommon_python_lib-0.2.2a18.dist-info → sycommon_python_lib-0.2.3.dist-info}/WHEEL +0 -0
- {sycommon_python_lib-0.2.2a18.dist-info → sycommon_python_lib-0.2.3.dist-info}/entry_points.txt +0 -0
- {sycommon_python_lib-0.2.2a18.dist-info → sycommon_python_lib-0.2.3.dist-info}/top_level.txt +0 -0
sycommon/agent/deep_agent.py
CHANGED
|
@@ -51,6 +51,7 @@ from sycommon.agent.sandbox.sandbox_recovery import SandboxRecoveryManager
|
|
|
51
51
|
from sycommon.agent.chat_events import ChatEvent, ChatEventBuilder, DEFAULT_AGENT_NAME
|
|
52
52
|
from sycommon.middleware.background_execution import BackgroundExecutionMiddleware
|
|
53
53
|
from sycommon.middleware.token_tracking import TokenTrackingMiddleware
|
|
54
|
+
from sycommon.middleware.tool_result_truncation import ToolResultTruncationMiddleware
|
|
54
55
|
from deepagents.middleware.summarization import create_summarization_tool_middleware # noqa: F401 保留 re-export
|
|
55
56
|
from sycommon.agent.summarization_utils import build_summarization_middleware
|
|
56
57
|
|
|
@@ -107,7 +108,7 @@ class AgentConfig(BaseModel):
|
|
|
107
108
|
|
|
108
109
|
# 沙箱配置
|
|
109
110
|
sandbox_service_name: str = "shengye-platform-sandbox"
|
|
110
|
-
sandbox_timeout: int =
|
|
111
|
+
sandbox_timeout: int = 300
|
|
111
112
|
skills_dir: Optional[str] = None
|
|
112
113
|
memory_dir: Optional[str] = None
|
|
113
114
|
|
|
@@ -167,6 +168,7 @@ class DeepAgent:
|
|
|
167
168
|
"""
|
|
168
169
|
current_tool_calls = []
|
|
169
170
|
ai_chunk_buffer = ""
|
|
171
|
+
ai_text_content = ""
|
|
170
172
|
seen_tool_call_ids = set()
|
|
171
173
|
stream_step = 0
|
|
172
174
|
# 兜底:累积流式 chunk 中的 usage_metadata(middleware 在流式场景可能拿不到)
|
|
@@ -210,6 +212,12 @@ class DeepAgent:
|
|
|
210
212
|
if usage_meta:
|
|
211
213
|
total_input_tokens += usage_meta.get("input_tokens", 0)
|
|
212
214
|
total_output_tokens += usage_meta.get("output_tokens", 0)
|
|
215
|
+
if usage_meta.get("input_tokens", 0) > 0:
|
|
216
|
+
SYLogger.debug(
|
|
217
|
+
f"[DeepAgent] usage_metadata | input={usage_meta.get('input_tokens', 0)} "
|
|
218
|
+
f"output={usage_meta.get('output_tokens', 0)} "
|
|
219
|
+
f"total={usage_meta.get('total_tokens', 0)} "
|
|
220
|
+
f"cumulative_input={total_input_tokens} step={stream_step}")
|
|
213
221
|
|
|
214
222
|
if msg_type == "AIMessageChunk":
|
|
215
223
|
tool_calls_log = getattr(msg, "tool_calls", [])
|
|
@@ -230,9 +238,32 @@ class DeepAgent:
|
|
|
230
238
|
f"[DeepAgent] AIMessage | content={repr(content_log)} | tools={tc_names}")
|
|
231
239
|
elif msg_type == "ToolMessage":
|
|
232
240
|
content_log = (msg.content or "")
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
241
|
+
if isinstance(content_log, list):
|
|
242
|
+
# e.g. [{'type': 'image', 'base64': '...'}]
|
|
243
|
+
preview_parts = []
|
|
244
|
+
total_len = 0
|
|
245
|
+
for item in content_log:
|
|
246
|
+
if isinstance(item, dict):
|
|
247
|
+
t = item.get('type', '?')
|
|
248
|
+
if t == 'image' and 'base64' in item:
|
|
249
|
+
b64 = item['base64']
|
|
250
|
+
total_len += len(b64)
|
|
251
|
+
preview_parts.append(
|
|
252
|
+
f"{{'type': 'image', 'base64': '{b64[:100]}...'({len(b64)} chars)}}")
|
|
253
|
+
else:
|
|
254
|
+
s = str(item)
|
|
255
|
+
total_len += len(s)
|
|
256
|
+
preview_parts.append(s[:100])
|
|
257
|
+
else:
|
|
258
|
+
s = str(item)
|
|
259
|
+
total_len += len(s)
|
|
260
|
+
preview_parts.append(s[:100])
|
|
261
|
+
print(
|
|
262
|
+
f"[DeepAgent] ToolResult | {getattr(msg, 'name', '?')} | len={total_len} | preview={preview_parts}")
|
|
263
|
+
else:
|
|
264
|
+
preview = str(content_log)[:100]
|
|
265
|
+
print(
|
|
266
|
+
f"[DeepAgent] ToolResult | {getattr(msg, 'name', '?')} | len={len(str(content_log))} | preview={repr(preview)}")
|
|
236
267
|
elif msg_type == "HumanMessage":
|
|
237
268
|
content_log = (msg.content or "")[:100]
|
|
238
269
|
print(
|
|
@@ -373,6 +404,7 @@ class DeepAgent:
|
|
|
373
404
|
|
|
374
405
|
if content:
|
|
375
406
|
ai_chunk_buffer += content
|
|
407
|
+
ai_text_content += content
|
|
376
408
|
event = ChatEventBuilder.ai_chunk(
|
|
377
409
|
content, id=getattr(msg, "id", None),
|
|
378
410
|
agent=DEFAULT_AGENT_NAME)
|
|
@@ -429,6 +461,13 @@ class DeepAgent:
|
|
|
429
461
|
print(
|
|
430
462
|
f"[DeepAgent] AI chunk done | {repr(ai_chunk_buffer[:100])}...")
|
|
431
463
|
|
|
464
|
+
# 空响应检测:模型被调用但没有产出任何文本
|
|
465
|
+
if not ai_text_content and not ai_chunk_buffer:
|
|
466
|
+
SYLogger.warning(
|
|
467
|
+
f"[DeepAgent] 空响应警告:模型未返回任何文本内容。"
|
|
468
|
+
f"stream_step={stream_step}, tool_calls={len(current_tool_calls)}, "
|
|
469
|
+
f"input_tokens={total_input_tokens}, output_tokens={total_output_tokens}")
|
|
470
|
+
|
|
432
471
|
# 兜底:如果 middleware 没有成功记录(流式场景),在这里补充记录
|
|
433
472
|
if total_input_tokens > 0 or total_output_tokens > 0:
|
|
434
473
|
try:
|
|
@@ -484,11 +523,19 @@ class DeepAgent:
|
|
|
484
523
|
max_retries: int = 3,
|
|
485
524
|
base_delay: float = 1.0,
|
|
486
525
|
) -> AsyncGenerator:
|
|
487
|
-
"""带重试机制的 astream
|
|
526
|
+
"""带重试机制的 astream
|
|
527
|
+
|
|
528
|
+
网络错误指数退避重试,BadRequestError 不重试。
|
|
529
|
+
上下文溢出由 deepagents SummarizationMiddleware 在模型调用层处理
|
|
530
|
+
(捕获 ContextOverflowError 后压缩消息并重试 LLM 调用),
|
|
531
|
+
不在此处重试整个 graph。
|
|
532
|
+
"""
|
|
488
533
|
import httpx
|
|
489
534
|
from openai import APIConnectionError, APIError, APITimeoutError
|
|
490
535
|
|
|
491
536
|
last_error = None
|
|
537
|
+
sandbox_retries = 0
|
|
538
|
+
max_sandbox_retries = 3
|
|
492
539
|
|
|
493
540
|
while True:
|
|
494
541
|
for attempt in range(max_retries):
|
|
@@ -511,8 +558,9 @@ class DeepAgent:
|
|
|
511
558
|
else:
|
|
512
559
|
raise last_error
|
|
513
560
|
except RuntimeError as e:
|
|
514
|
-
if "沙箱服务不可用" in str(e) and self.recovery_manager:
|
|
515
|
-
|
|
561
|
+
if "沙箱服务不可用" in str(e) and self.recovery_manager and sandbox_retries < max_sandbox_retries:
|
|
562
|
+
sandbox_retries += 1
|
|
563
|
+
SYLogger.warning(f"[DeepAgent] 沙箱服务不可用,尝试恢复 ({sandbox_retries}/{max_sandbox_retries})...")
|
|
516
564
|
recovered = await self.recovery_manager.recover()
|
|
517
565
|
if recovered:
|
|
518
566
|
SYLogger.info("[DeepAgent] 沙箱已恢复,继续执行")
|
|
@@ -584,7 +632,8 @@ async def create_deep_agent(
|
|
|
584
632
|
# 创建 agent
|
|
585
633
|
from deepagents import create_deep_agent as _create_deep_agent
|
|
586
634
|
|
|
587
|
-
#
|
|
635
|
+
# 创建 compact_conversation 工具 middleware
|
|
636
|
+
# 自动压缩由 deepagents 内置的 SummarizationMiddleware 处理(通过 model.profile 获取阈值)
|
|
588
637
|
summarization_mw = build_summarization_middleware(
|
|
589
638
|
model, config.model_name, sandbox_backend,
|
|
590
639
|
)
|
|
@@ -598,6 +647,7 @@ async def create_deep_agent(
|
|
|
598
647
|
"debug": config.debug,
|
|
599
648
|
"middleware": [
|
|
600
649
|
BackgroundExecutionMiddleware(backend=sandbox_backend),
|
|
650
|
+
ToolResultTruncationMiddleware(),
|
|
601
651
|
TokenTrackingMiddleware(model_name=config.model_name, user_id=user_id),
|
|
602
652
|
summarization_mw,
|
|
603
653
|
],
|
|
@@ -44,6 +44,7 @@ from sycommon.agent.sandbox.sandbox_recovery import SandboxRecoveryManager
|
|
|
44
44
|
from sycommon.agent.chat_events import ChatEvent, ChatEventBuilder, DEFAULT_AGENT_NAME
|
|
45
45
|
from sycommon.middleware.background_execution import BackgroundExecutionMiddleware
|
|
46
46
|
from sycommon.middleware.token_tracking import TokenTrackingMiddleware
|
|
47
|
+
from sycommon.middleware.tool_result_truncation import ToolResultTruncationMiddleware
|
|
47
48
|
from deepagents.middleware.summarization import create_summarization_tool_middleware # noqa: F401 保留 re-export
|
|
48
49
|
from sycommon.agent.summarization_utils import build_summarization_middleware
|
|
49
50
|
|
|
@@ -165,7 +166,7 @@ class TeamConfig(BaseModel):
|
|
|
165
166
|
coordinator_name: str = "项目经理"
|
|
166
167
|
|
|
167
168
|
sandbox_service_name: str = "shengye-platform-sandbox"
|
|
168
|
-
sandbox_timeout: int =
|
|
169
|
+
sandbox_timeout: int = 300
|
|
169
170
|
skills_dir: Optional[str] = None
|
|
170
171
|
memory_dir: Optional[str] = None
|
|
171
172
|
|
|
@@ -583,10 +584,12 @@ async def create_multi_agent_team(
|
|
|
583
584
|
from deepagents import create_deep_agent
|
|
584
585
|
from deepagents.middleware.subagents import CompiledSubAgent
|
|
585
586
|
|
|
587
|
+
summarization_mw = build_summarization_middleware(model, config.model_name, sandbox_backend)
|
|
586
588
|
middleware = [
|
|
587
589
|
BackgroundExecutionMiddleware(backend=sandbox_backend),
|
|
590
|
+
ToolResultTruncationMiddleware(),
|
|
588
591
|
TokenTrackingMiddleware(model_name=config.model_name, user_id=user_id),
|
|
589
|
-
|
|
592
|
+
summarization_mw,
|
|
590
593
|
]
|
|
591
594
|
shared = config.shared_tools or [get_current_date]
|
|
592
595
|
|
|
@@ -627,6 +630,7 @@ async def create_multi_agent_team(
|
|
|
627
630
|
|
|
628
631
|
# 创建协调者 Agent
|
|
629
632
|
coord_name = config.coordinator_name
|
|
633
|
+
coord_summarization_mw = build_summarization_middleware(model, config.model_name, sandbox_backend)
|
|
630
634
|
coordinator_agent = create_deep_agent(
|
|
631
635
|
model=model,
|
|
632
636
|
tools=config.shared_tools or [get_current_date],
|
|
@@ -640,8 +644,9 @@ async def create_multi_agent_team(
|
|
|
640
644
|
name=coord_name,
|
|
641
645
|
middleware=[
|
|
642
646
|
BackgroundExecutionMiddleware(backend=sandbox_backend),
|
|
647
|
+
ToolResultTruncationMiddleware(),
|
|
643
648
|
TokenTrackingMiddleware(model_name=config.model_name, user_id=user_id),
|
|
644
|
-
|
|
649
|
+
coord_summarization_mw,
|
|
645
650
|
],
|
|
646
651
|
)
|
|
647
652
|
|
|
@@ -266,8 +266,12 @@ class FileOperationsMixin:
|
|
|
266
266
|
"limit": limit
|
|
267
267
|
}, timeout=timeout)
|
|
268
268
|
if result.get("error"):
|
|
269
|
-
|
|
270
|
-
|
|
269
|
+
err_msg = result['error']
|
|
270
|
+
if 'not found' in err_msg:
|
|
271
|
+
SYLogger.warning(f"[Sandbox] 异步读取文件未找到: {err_msg}")
|
|
272
|
+
else:
|
|
273
|
+
SYLogger.error(f"[Sandbox] 异步读取文件失败: {err_msg}")
|
|
274
|
+
return ReadResult(error=err_msg)
|
|
271
275
|
content = result.get("content", "")
|
|
272
276
|
encoding = result.get("encoding", "utf-8")
|
|
273
277
|
SYLogger.info(
|
|
@@ -329,7 +333,11 @@ class FileOperationsMixin:
|
|
|
329
333
|
path=result.get("path")
|
|
330
334
|
)
|
|
331
335
|
if write_result.error:
|
|
332
|
-
|
|
336
|
+
err_msg = write_result.error
|
|
337
|
+
if 'already exists' in err_msg:
|
|
338
|
+
SYLogger.warning(f"[Sandbox] 异步写入文件已存在: {err_msg}")
|
|
339
|
+
else:
|
|
340
|
+
SYLogger.error(f"[Sandbox] 异步写入失败: {err_msg}")
|
|
333
341
|
else:
|
|
334
342
|
SYLogger.info(f"[Sandbox] 异步写入成功: {write_result.path}")
|
|
335
343
|
return write_result
|
|
@@ -1,46 +1,164 @@
|
|
|
1
1
|
"""上下文压缩 middleware 构建工具。
|
|
2
2
|
|
|
3
|
-
根据 nacos 中配置的模型 maxTokens,用绝对 token
|
|
4
|
-
|
|
3
|
+
根据 nacos 中配置的模型 maxTokens,用绝对 token 数设置压缩阈值。
|
|
4
|
+
优先使用模型 API 返回的 usage_metadata.total_tokens(真实 token 数),
|
|
5
|
+
无 metadata 时回退到 chars_per_token=2.0 的估算值。
|
|
6
|
+
同时增加基于消息数的安全阈值,防止估算偏低导致压缩不触发。
|
|
5
7
|
"""
|
|
6
8
|
|
|
7
9
|
from __future__ import annotations
|
|
10
|
+
import deepagents.middleware.summarization as _summ_mod
|
|
8
11
|
|
|
12
|
+
import functools
|
|
13
|
+
import logging
|
|
9
14
|
from typing import TYPE_CHECKING
|
|
10
15
|
|
|
11
16
|
from deepagents.middleware.summarization import (
|
|
12
17
|
SummarizationMiddleware,
|
|
13
18
|
SummarizationToolMiddleware,
|
|
14
19
|
)
|
|
20
|
+
from langchain_core.messages.utils import count_tokens_approximately
|
|
15
21
|
|
|
16
22
|
if TYPE_CHECKING:
|
|
17
23
|
from langchain_core.language_models import BaseChatModel
|
|
18
24
|
from deepagents.backends.protocol import BACKEND_TYPES
|
|
19
25
|
|
|
26
|
+
logger = logging.getLogger(__name__)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _extract_last_usage_total_tokens(messages) -> int:
|
|
30
|
+
"""从消息历史中提取最后一条 AIMessage 的 usage_metadata.total_tokens。
|
|
31
|
+
|
|
32
|
+
返回 0 表示无数据(需回退到估算)。
|
|
33
|
+
"""
|
|
34
|
+
from langchain_core.messages import AIMessage
|
|
35
|
+
for msg in reversed(messages):
|
|
36
|
+
if isinstance(msg, AIMessage):
|
|
37
|
+
meta = getattr(msg, 'usage_metadata', None)
|
|
38
|
+
if meta and isinstance(meta, dict):
|
|
39
|
+
total = meta.get('total_tokens', 0)
|
|
40
|
+
if isinstance(total, int) and total > 0:
|
|
41
|
+
return total
|
|
42
|
+
return 0
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _patched_compute_summarization_defaults(model):
|
|
46
|
+
"""覆盖 deepagents 默认值,返回中文场景修正后的绝对 token 阈值。
|
|
47
|
+
|
|
48
|
+
同时增加基于消息数的安全阈值:即使 token 估算偏低,
|
|
49
|
+
消息数超过 200 条时也会触发压缩(覆盖工具 schema 等未计入的开销)。
|
|
50
|
+
"""
|
|
51
|
+
try:
|
|
52
|
+
from sycommon.config.Config import Config
|
|
53
|
+
model_name = getattr(model, 'model_name', None) or getattr(
|
|
54
|
+
model, 'model', None)
|
|
55
|
+
if model_name:
|
|
56
|
+
llm_cfg = Config().get_llm_config(model_name)
|
|
57
|
+
max_tokens = llm_cfg.get("maxTokens", 72000)
|
|
58
|
+
else:
|
|
59
|
+
max_tokens = 72000
|
|
60
|
+
except Exception:
|
|
61
|
+
max_tokens = 72000
|
|
62
|
+
|
|
63
|
+
# 60% 触发(120K/200K):实测模型在 input≈137K 时开始退化,
|
|
64
|
+
# 在 120K 触发压缩留 ~17K 安全余量给工具 schema 等未计入开销
|
|
65
|
+
trigger = int(max_tokens * 0.60)
|
|
66
|
+
keep = int(max_tokens * 0.10)
|
|
67
|
+
return {
|
|
68
|
+
"trigger": [("tokens", trigger), ("messages", 200)],
|
|
69
|
+
"keep": ("tokens", keep),
|
|
70
|
+
"truncate_args_settings": {
|
|
71
|
+
"trigger": ("tokens", trigger),
|
|
72
|
+
"keep": ("tokens", keep),
|
|
73
|
+
},
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
# monkey-patch:替换 deepagents 的默认计算函数
|
|
78
|
+
_summ_mod.compute_summarization_defaults = _patched_compute_summarization_defaults
|
|
79
|
+
|
|
80
|
+
# monkey-patch:在内置 middleware 的 awrap_model_call 中注入真实 token + 日志
|
|
81
|
+
_OrigDeepAgentsSumm = _summ_mod._DeepAgentsSummarizationMiddleware
|
|
82
|
+
_orig_awrap_model_call = _OrigDeepAgentsSumm.awrap_model_call
|
|
83
|
+
|
|
84
|
+
# 基础估算函数,用于日志对比
|
|
85
|
+
_approx_counter = functools.partial(
|
|
86
|
+
count_tokens_approximately, chars_per_token=2.0)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
async def _patched_awrap_model_call(self, request, handler):
|
|
90
|
+
effective_messages = self._get_effective_messages(request)
|
|
91
|
+
truncated_messages, _ = self._truncate_args(
|
|
92
|
+
effective_messages, request.system_message, request.tools,
|
|
93
|
+
)
|
|
94
|
+
counted_messages = [request.system_message, *
|
|
95
|
+
truncated_messages] if request.system_message is not None else truncated_messages
|
|
96
|
+
|
|
97
|
+
# 从截断前的 effective_messages 提取真实 token(截断会丢失 usage_metadata)
|
|
98
|
+
real_tokens = _extract_last_usage_total_tokens(effective_messages)
|
|
99
|
+
|
|
100
|
+
# 估算值(用于日志对比)
|
|
101
|
+
try:
|
|
102
|
+
estimated = _approx_counter(counted_messages, tools=request.tools)
|
|
103
|
+
except TypeError:
|
|
104
|
+
estimated = _approx_counter(counted_messages)
|
|
105
|
+
|
|
106
|
+
# 如果有真实 token,临时替换 token_counter 使 _orig 内部判断也用真实值
|
|
107
|
+
# 这样 _should_summarize 和 _determine_cutoff_index 都能拿到正确的 token 数
|
|
108
|
+
if real_tokens > 0:
|
|
109
|
+
original_counter = self.token_counter
|
|
110
|
+
|
|
111
|
+
def _real_counter(msgs, **kwargs):
|
|
112
|
+
# 优先从当前消息中提取真实值(压缩后的消息可能有新的 metadata)
|
|
113
|
+
r = _extract_last_usage_total_tokens(msgs)
|
|
114
|
+
return r if r > 0 else real_tokens
|
|
115
|
+
|
|
116
|
+
self._lc_helper.token_counter = _real_counter
|
|
117
|
+
try:
|
|
118
|
+
result = await _orig_awrap_model_call(self, request, handler)
|
|
119
|
+
finally:
|
|
120
|
+
self._lc_helper.token_counter = original_counter
|
|
121
|
+
else:
|
|
122
|
+
result = await _orig_awrap_model_call(self, request, handler)
|
|
123
|
+
|
|
124
|
+
# 日志
|
|
125
|
+
source = 'real' if real_tokens > 0 else 'estimated'
|
|
126
|
+
should = self._should_summarize(truncated_messages, real_tokens if real_tokens > 0 else estimated)
|
|
127
|
+
print(
|
|
128
|
+
f"[TokenCount] real={real_tokens} estimated={estimated} "
|
|
129
|
+
f"source={source} msgs={len(counted_messages)} "
|
|
130
|
+
f"should_summarize={should} "
|
|
131
|
+
f"trigger={getattr(self._lc_helper, 'trigger', '?')}")
|
|
132
|
+
return result
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
_OrigDeepAgentsSumm.awrap_model_call = _patched_awrap_model_call
|
|
136
|
+
|
|
20
137
|
|
|
21
138
|
def build_summarization_middleware(
|
|
22
139
|
model: BaseChatModel,
|
|
23
140
|
model_name: str,
|
|
24
141
|
backend: "BACKEND_TYPES",
|
|
25
142
|
*,
|
|
26
|
-
trigger_fraction: float = 0.
|
|
143
|
+
trigger_fraction: float = 0.60,
|
|
27
144
|
keep_fraction: float = 0.10,
|
|
28
145
|
default_max_tokens: int = 200000,
|
|
29
146
|
) -> SummarizationToolMiddleware:
|
|
30
|
-
"""
|
|
147
|
+
"""根据模型上下文窗口大小构建 compact_conversation 工具 middleware。
|
|
31
148
|
|
|
32
|
-
|
|
149
|
+
优先使用模型返回的 usage_metadata 真实 token 数进行压缩判断,
|
|
150
|
+
无 usage_metadata 时回退到 chars_per_token=2.0 估算。
|
|
33
151
|
|
|
34
152
|
Args:
|
|
35
153
|
model: LLM 实例。
|
|
36
154
|
model_name: 模型名称(用于从 nacos 读取配置)。
|
|
37
155
|
backend: 后端实例。
|
|
38
|
-
trigger_fraction:
|
|
39
|
-
keep_fraction:
|
|
156
|
+
trigger_fraction: 触发压缩占有效输入的比例,默认 60%。
|
|
157
|
+
keep_fraction: 压缩后保留占有效输入的比例,默认 10%。
|
|
40
158
|
default_max_tokens: 无法从配置读取时的默认上下文窗口大小。
|
|
41
159
|
|
|
42
160
|
Returns:
|
|
43
|
-
SummarizationToolMiddleware
|
|
161
|
+
SummarizationToolMiddleware 实例(提供 compact_conversation 工具)。
|
|
44
162
|
"""
|
|
45
163
|
try:
|
|
46
164
|
from sycommon.config.Config import Config
|
|
@@ -58,11 +176,17 @@ def build_summarization_middleware(
|
|
|
58
176
|
backend=backend,
|
|
59
177
|
trigger=("tokens", trigger_tokens),
|
|
60
178
|
keep=("tokens", keep_tokens),
|
|
179
|
+
token_counter=functools.partial(
|
|
180
|
+
count_tokens_approximately, chars_per_token=2.0),
|
|
61
181
|
trim_tokens_to_summarize=None,
|
|
62
182
|
truncate_args_settings={
|
|
63
183
|
"trigger": ("tokens", trigger_tokens),
|
|
64
184
|
"keep": ("tokens", keep_tokens),
|
|
65
|
-
"max_length": 2000,
|
|
66
185
|
},
|
|
67
186
|
)
|
|
187
|
+
|
|
188
|
+
print(f"[Summarization] compact_conversation 工具配置: model={model_name}, "
|
|
189
|
+
f"max_tokens={max_tokens}, "
|
|
190
|
+
f"trigger={trigger_tokens} tokens ({trigger_fraction:.0%}), "
|
|
191
|
+
f"keep={keep_tokens} tokens ({keep_fraction:.0%})")
|
|
68
192
|
return SummarizationToolMiddleware(summ)
|
sycommon/config/LLMConfig.py
CHANGED
sycommon/llm/get_llm.py
CHANGED
|
@@ -153,7 +153,7 @@ def get_llm(
|
|
|
153
153
|
|
|
154
154
|
# 根据 wrap_structured 决定默认超时
|
|
155
155
|
if timeout is _TIMEOUT_UNSET:
|
|
156
|
-
timeout = 180 if wrap_structured else
|
|
156
|
+
timeout = 180 if wrap_structured else 300
|
|
157
157
|
|
|
158
158
|
init_params = {
|
|
159
159
|
"model_provider": llmConfig.provider,
|
|
@@ -165,9 +165,13 @@ def get_llm(
|
|
|
165
165
|
"streaming": streaming,
|
|
166
166
|
"timeout": timeout,
|
|
167
167
|
"max_retries": max_retries,
|
|
168
|
-
"stream_chunk_timeout":
|
|
168
|
+
"stream_chunk_timeout": 180,
|
|
169
169
|
}
|
|
170
170
|
|
|
171
|
+
# 传入 maxOutputTokens(max_completion_tokens)确保模型有足够的输出 token 空间
|
|
172
|
+
if llmConfig.maxOutputTokens:
|
|
173
|
+
init_params["max_tokens"] = llmConfig.maxOutputTokens
|
|
174
|
+
|
|
171
175
|
# 合并其他透传参数(包括 presence_penalty, extra_body, top_p 等)
|
|
172
176
|
init_params.update(kwargs)
|
|
173
177
|
|
|
@@ -230,6 +230,23 @@ class LLMWithTokenTracking(BaseChatModel):
|
|
|
230
230
|
"output_tokens": total_output,
|
|
231
231
|
})
|
|
232
232
|
|
|
233
|
+
def _resolve_model_profile(self):
|
|
234
|
+
"""从 llmConfig.maxTokens 构建 profile,供 deepagents 计算压缩阈值使用。"""
|
|
235
|
+
if self.llmConfig and self.llmConfig.maxTokens:
|
|
236
|
+
return {"max_input_tokens": self.llmConfig.maxTokens}
|
|
237
|
+
if self.llm and hasattr(self.llm, 'profile') and self.llm.profile:
|
|
238
|
+
return self.llm.profile
|
|
239
|
+
return None
|
|
240
|
+
|
|
241
|
+
@property
|
|
242
|
+
def profile(self):
|
|
243
|
+
p = self._resolve_model_profile()
|
|
244
|
+
return p
|
|
245
|
+
|
|
246
|
+
@profile.setter
|
|
247
|
+
def profile(self, value):
|
|
248
|
+
pass
|
|
249
|
+
|
|
233
250
|
def bind_tools(self, tools, *, tool_choice=None, **kwargs):
|
|
234
251
|
"""绑定工具 - 委托给底层 LLM,返回绑定后的 Runnable"""
|
|
235
252
|
return self.llm.bind_tools(tools, tool_choice=tool_choice, **kwargs)
|
sycommon/llm/struct_token.py
CHANGED
|
@@ -345,7 +345,6 @@ class StructuredRunnableWithToken(Runnable):
|
|
|
345
345
|
try:
|
|
346
346
|
with self.langfuse.start_as_current_observation(as_type="span", name="invoke") as span:
|
|
347
347
|
with propagate_attributes(session_id=trace_id, user_id=user_id):
|
|
348
|
-
span.update_trace(user_id=user_id, session_id=trace_id)
|
|
349
348
|
return self._execute_chain(input, config, trace_id, user_id, span)
|
|
350
349
|
except Exception as e:
|
|
351
350
|
# Langfuse 跟踪失败不应阻断业务,降级执行
|
|
@@ -366,7 +365,6 @@ class StructuredRunnableWithToken(Runnable):
|
|
|
366
365
|
try:
|
|
367
366
|
with self.langfuse.start_as_current_observation(as_type="span", name="ainvoke") as span:
|
|
368
367
|
with propagate_attributes(session_id=trace_id, user_id=user_id):
|
|
369
|
-
span.update_trace(user_id=user_id, session_id=trace_id)
|
|
370
368
|
return await self._aexecute_chain(input, config, trace_id, user_id, span)
|
|
371
369
|
except Exception as e:
|
|
372
370
|
# Langfuse 跟踪失败不应阻断业务,降级执行
|
|
@@ -405,7 +403,7 @@ class StructuredRunnableWithToken(Runnable):
|
|
|
405
403
|
input_data = {"messages": adapted_input}
|
|
406
404
|
|
|
407
405
|
if span:
|
|
408
|
-
span.
|
|
406
|
+
span.update(input=input_data)
|
|
409
407
|
|
|
410
408
|
structured_result = self.retry_chain.invoke(
|
|
411
409
|
input_data,
|
|
@@ -413,7 +411,7 @@ class StructuredRunnableWithToken(Runnable):
|
|
|
413
411
|
)
|
|
414
412
|
|
|
415
413
|
if span:
|
|
416
|
-
span.
|
|
414
|
+
span.update(output=structured_result)
|
|
417
415
|
|
|
418
416
|
token_usage = token_handler.usage_metadata
|
|
419
417
|
structured_result._token_usage_ = token_usage
|
|
@@ -447,10 +445,10 @@ class StructuredRunnableWithToken(Runnable):
|
|
|
447
445
|
|
|
448
446
|
# 2. 检查并执行上下文压缩 (仅在异步模式且开启时)
|
|
449
447
|
if self.enable_compression:
|
|
450
|
-
max_tokens = self.llmConfig.maxTokens
|
|
448
|
+
max_tokens = int(self.llmConfig.maxTokens * self.threshold_ratio)
|
|
451
449
|
current_tokens = self._count_tokens(adapted_input)
|
|
452
450
|
|
|
453
|
-
if current_tokens > max_tokens
|
|
451
|
+
if current_tokens > max_tokens:
|
|
454
452
|
SYLogger.warning(
|
|
455
453
|
f"⚠️ Context limit reached: {current_tokens}/{max_tokens}")
|
|
456
454
|
# 执行压缩,替换 adapted_input
|
|
@@ -465,7 +463,7 @@ class StructuredRunnableWithToken(Runnable):
|
|
|
465
463
|
input_data = {"messages": adapted_input}
|
|
466
464
|
|
|
467
465
|
if span:
|
|
468
|
-
span.
|
|
466
|
+
span.update(input=input_data)
|
|
469
467
|
|
|
470
468
|
# 3. 调用子链
|
|
471
469
|
structured_result = await self.retry_chain.ainvoke(
|
|
@@ -474,7 +472,7 @@ class StructuredRunnableWithToken(Runnable):
|
|
|
474
472
|
)
|
|
475
473
|
|
|
476
474
|
if span:
|
|
477
|
-
span.
|
|
475
|
+
span.update(output=structured_result)
|
|
478
476
|
|
|
479
477
|
token_usage = token_handler.usage_metadata
|
|
480
478
|
structured_result._token_usage_ = token_usage
|
sycommon/llm/sy_langfuse.py
CHANGED
|
@@ -3,11 +3,26 @@ from typing import Tuple, List, Optional, Any, Dict
|
|
|
3
3
|
from langfuse import Langfuse, get_client
|
|
4
4
|
from sycommon.config.Config import Config, SingletonMeta
|
|
5
5
|
from sycommon.logging.kafka_log import SYLogger
|
|
6
|
-
from langfuse.langchain import CallbackHandler
|
|
6
|
+
from langfuse.langchain import CallbackHandler as _LangfuseCallbackHandler
|
|
7
7
|
from sycommon.tools.env import get_env_var
|
|
8
8
|
from sycommon.tools.merge_headers import get_header_value
|
|
9
9
|
|
|
10
10
|
|
|
11
|
+
class SafeLangfuseCallbackHandler(_LangfuseCallbackHandler):
|
|
12
|
+
"""Wraps Langfuse CallbackHandler to handle unpicklable error objects.
|
|
13
|
+
|
|
14
|
+
LangGraph errors (e.g. Command/Interrupt dataclasses) may hold
|
|
15
|
+
_asyncio.Task references. When Langfuse calls str(error) → asdict() →
|
|
16
|
+
deepcopy(), this raises TypeError: cannot pickle '_asyncio.Task' object.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
def on_chain_error(self, error, **kwargs):
|
|
20
|
+
try:
|
|
21
|
+
super().on_chain_error(error, **kwargs)
|
|
22
|
+
except (TypeError, ValueError):
|
|
23
|
+
pass
|
|
24
|
+
|
|
25
|
+
|
|
11
26
|
class LangfuseInitializer(metaclass=SingletonMeta):
|
|
12
27
|
"""
|
|
13
28
|
Langfuse 初始化管理器
|
|
@@ -46,14 +61,14 @@ class LangfuseInitializer(metaclass=SingletonMeta):
|
|
|
46
61
|
'baseUrl', '')
|
|
47
62
|
os.environ["LANGFUSE_TRACING_ENVIRONMENT"] = environment
|
|
48
63
|
os.environ["OTEL_SERVICE_NAME"] = server_name
|
|
49
|
-
#
|
|
50
|
-
os.environ["
|
|
51
|
-
|
|
52
|
-
os.environ["
|
|
64
|
+
# 控制 Langfuse OTLP 导出:超时 30s、每批最多 32 个 span、每 10s 强制 flush
|
|
65
|
+
os.environ["LANGFUSE_TIMEOUT"] = "30"
|
|
66
|
+
os.environ["LANGFUSE_FLUSH_AT"] = "32"
|
|
67
|
+
os.environ["LANGFUSE_FLUSH_INTERVAL"] = "10"
|
|
53
68
|
|
|
54
69
|
self._langfuse_client = get_client()
|
|
55
70
|
|
|
56
|
-
langfuse_handler =
|
|
71
|
+
langfuse_handler = SafeLangfuseCallbackHandler()
|
|
57
72
|
self._base_callbacks.append(langfuse_handler)
|
|
58
73
|
|
|
59
74
|
SYLogger.info(f"Langfuse 初始化成功 [Service: {server_name}]")
|