sycommon-python-lib 0.2.2a19__py3-none-any.whl → 0.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sycommon/agent/deep_agent.py +56 -8
- sycommon/agent/multi_agent_team.py +5 -3
- sycommon/agent/sandbox/file_ops.py +11 -3
- sycommon/agent/summarization_utils.py +133 -9
- sycommon/config/LLMConfig.py +1 -0
- sycommon/llm/get_llm.py +6 -2
- sycommon/llm/llm_with_token_tracking.py +17 -0
- sycommon/llm/struct_token.py +2 -2
- sycommon/llm/sy_langfuse.py +21 -6
- sycommon/middleware/tool_result_truncation.py +56 -0
- sycommon/models/mqlistener_config.py +1 -1
- sycommon/rabbitmq/rabbitmq_client.py +56 -8
- sycommon/tests/test_real_summarization.py +166 -0
- sycommon/tests/test_summarization_config.py +463 -0
- sycommon/tests/test_summarization_real.py +329 -0
- {sycommon_python_lib-0.2.2a19.dist-info → sycommon_python_lib-0.2.3.dist-info}/METADATA +10 -10
- {sycommon_python_lib-0.2.2a19.dist-info → sycommon_python_lib-0.2.3.dist-info}/RECORD +20 -17
- {sycommon_python_lib-0.2.2a19.dist-info → sycommon_python_lib-0.2.3.dist-info}/WHEEL +0 -0
- {sycommon_python_lib-0.2.2a19.dist-info → sycommon_python_lib-0.2.3.dist-info}/entry_points.txt +0 -0
- {sycommon_python_lib-0.2.2a19.dist-info → sycommon_python_lib-0.2.3.dist-info}/top_level.txt +0 -0
sycommon/agent/deep_agent.py
CHANGED
|
@@ -108,7 +108,7 @@ class AgentConfig(BaseModel):
|
|
|
108
108
|
|
|
109
109
|
# 沙箱配置
|
|
110
110
|
sandbox_service_name: str = "shengye-platform-sandbox"
|
|
111
|
-
sandbox_timeout: int =
|
|
111
|
+
sandbox_timeout: int = 300
|
|
112
112
|
skills_dir: Optional[str] = None
|
|
113
113
|
memory_dir: Optional[str] = None
|
|
114
114
|
|
|
@@ -168,6 +168,7 @@ class DeepAgent:
|
|
|
168
168
|
"""
|
|
169
169
|
current_tool_calls = []
|
|
170
170
|
ai_chunk_buffer = ""
|
|
171
|
+
ai_text_content = ""
|
|
171
172
|
seen_tool_call_ids = set()
|
|
172
173
|
stream_step = 0
|
|
173
174
|
# 兜底:累积流式 chunk 中的 usage_metadata(middleware 在流式场景可能拿不到)
|
|
@@ -211,6 +212,12 @@ class DeepAgent:
|
|
|
211
212
|
if usage_meta:
|
|
212
213
|
total_input_tokens += usage_meta.get("input_tokens", 0)
|
|
213
214
|
total_output_tokens += usage_meta.get("output_tokens", 0)
|
|
215
|
+
if usage_meta.get("input_tokens", 0) > 0:
|
|
216
|
+
SYLogger.debug(
|
|
217
|
+
f"[DeepAgent] usage_metadata | input={usage_meta.get('input_tokens', 0)} "
|
|
218
|
+
f"output={usage_meta.get('output_tokens', 0)} "
|
|
219
|
+
f"total={usage_meta.get('total_tokens', 0)} "
|
|
220
|
+
f"cumulative_input={total_input_tokens} step={stream_step}")
|
|
214
221
|
|
|
215
222
|
if msg_type == "AIMessageChunk":
|
|
216
223
|
tool_calls_log = getattr(msg, "tool_calls", [])
|
|
@@ -231,9 +238,32 @@ class DeepAgent:
|
|
|
231
238
|
f"[DeepAgent] AIMessage | content={repr(content_log)} | tools={tc_names}")
|
|
232
239
|
elif msg_type == "ToolMessage":
|
|
233
240
|
content_log = (msg.content or "")
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
241
|
+
if isinstance(content_log, list):
|
|
242
|
+
# e.g. [{'type': 'image', 'base64': '...'}]
|
|
243
|
+
preview_parts = []
|
|
244
|
+
total_len = 0
|
|
245
|
+
for item in content_log:
|
|
246
|
+
if isinstance(item, dict):
|
|
247
|
+
t = item.get('type', '?')
|
|
248
|
+
if t == 'image' and 'base64' in item:
|
|
249
|
+
b64 = item['base64']
|
|
250
|
+
total_len += len(b64)
|
|
251
|
+
preview_parts.append(
|
|
252
|
+
f"{{'type': 'image', 'base64': '{b64[:100]}...'({len(b64)} chars)}}")
|
|
253
|
+
else:
|
|
254
|
+
s = str(item)
|
|
255
|
+
total_len += len(s)
|
|
256
|
+
preview_parts.append(s[:100])
|
|
257
|
+
else:
|
|
258
|
+
s = str(item)
|
|
259
|
+
total_len += len(s)
|
|
260
|
+
preview_parts.append(s[:100])
|
|
261
|
+
print(
|
|
262
|
+
f"[DeepAgent] ToolResult | {getattr(msg, 'name', '?')} | len={total_len} | preview={preview_parts}")
|
|
263
|
+
else:
|
|
264
|
+
preview = str(content_log)[:100]
|
|
265
|
+
print(
|
|
266
|
+
f"[DeepAgent] ToolResult | {getattr(msg, 'name', '?')} | len={len(str(content_log))} | preview={repr(preview)}")
|
|
237
267
|
elif msg_type == "HumanMessage":
|
|
238
268
|
content_log = (msg.content or "")[:100]
|
|
239
269
|
print(
|
|
@@ -374,6 +404,7 @@ class DeepAgent:
|
|
|
374
404
|
|
|
375
405
|
if content:
|
|
376
406
|
ai_chunk_buffer += content
|
|
407
|
+
ai_text_content += content
|
|
377
408
|
event = ChatEventBuilder.ai_chunk(
|
|
378
409
|
content, id=getattr(msg, "id", None),
|
|
379
410
|
agent=DEFAULT_AGENT_NAME)
|
|
@@ -430,6 +461,13 @@ class DeepAgent:
|
|
|
430
461
|
print(
|
|
431
462
|
f"[DeepAgent] AI chunk done | {repr(ai_chunk_buffer[:100])}...")
|
|
432
463
|
|
|
464
|
+
# 空响应检测:模型被调用但没有产出任何文本
|
|
465
|
+
if not ai_text_content and not ai_chunk_buffer:
|
|
466
|
+
SYLogger.warning(
|
|
467
|
+
f"[DeepAgent] 空响应警告:模型未返回任何文本内容。"
|
|
468
|
+
f"stream_step={stream_step}, tool_calls={len(current_tool_calls)}, "
|
|
469
|
+
f"input_tokens={total_input_tokens}, output_tokens={total_output_tokens}")
|
|
470
|
+
|
|
433
471
|
# 兜底:如果 middleware 没有成功记录(流式场景),在这里补充记录
|
|
434
472
|
if total_input_tokens > 0 or total_output_tokens > 0:
|
|
435
473
|
try:
|
|
@@ -485,11 +523,19 @@ class DeepAgent:
|
|
|
485
523
|
max_retries: int = 3,
|
|
486
524
|
base_delay: float = 1.0,
|
|
487
525
|
) -> AsyncGenerator:
|
|
488
|
-
"""带重试机制的 astream
|
|
526
|
+
"""带重试机制的 astream
|
|
527
|
+
|
|
528
|
+
网络错误指数退避重试,BadRequestError 不重试。
|
|
529
|
+
上下文溢出由 deepagents SummarizationMiddleware 在模型调用层处理
|
|
530
|
+
(捕获 ContextOverflowError 后压缩消息并重试 LLM 调用),
|
|
531
|
+
不在此处重试整个 graph。
|
|
532
|
+
"""
|
|
489
533
|
import httpx
|
|
490
534
|
from openai import APIConnectionError, APIError, APITimeoutError
|
|
491
535
|
|
|
492
536
|
last_error = None
|
|
537
|
+
sandbox_retries = 0
|
|
538
|
+
max_sandbox_retries = 3
|
|
493
539
|
|
|
494
540
|
while True:
|
|
495
541
|
for attempt in range(max_retries):
|
|
@@ -512,8 +558,9 @@ class DeepAgent:
|
|
|
512
558
|
else:
|
|
513
559
|
raise last_error
|
|
514
560
|
except RuntimeError as e:
|
|
515
|
-
if "沙箱服务不可用" in str(e) and self.recovery_manager:
|
|
516
|
-
|
|
561
|
+
if "沙箱服务不可用" in str(e) and self.recovery_manager and sandbox_retries < max_sandbox_retries:
|
|
562
|
+
sandbox_retries += 1
|
|
563
|
+
SYLogger.warning(f"[DeepAgent] 沙箱服务不可用,尝试恢复 ({sandbox_retries}/{max_sandbox_retries})...")
|
|
517
564
|
recovered = await self.recovery_manager.recover()
|
|
518
565
|
if recovered:
|
|
519
566
|
SYLogger.info("[DeepAgent] 沙箱已恢复,继续执行")
|
|
@@ -585,7 +632,8 @@ async def create_deep_agent(
|
|
|
585
632
|
# 创建 agent
|
|
586
633
|
from deepagents import create_deep_agent as _create_deep_agent
|
|
587
634
|
|
|
588
|
-
#
|
|
635
|
+
# 创建 compact_conversation 工具 middleware
|
|
636
|
+
# 自动压缩由 deepagents 内置的 SummarizationMiddleware 处理(通过 model.profile 获取阈值)
|
|
589
637
|
summarization_mw = build_summarization_middleware(
|
|
590
638
|
model, config.model_name, sandbox_backend,
|
|
591
639
|
)
|
|
@@ -166,7 +166,7 @@ class TeamConfig(BaseModel):
|
|
|
166
166
|
coordinator_name: str = "项目经理"
|
|
167
167
|
|
|
168
168
|
sandbox_service_name: str = "shengye-platform-sandbox"
|
|
169
|
-
sandbox_timeout: int =
|
|
169
|
+
sandbox_timeout: int = 300
|
|
170
170
|
skills_dir: Optional[str] = None
|
|
171
171
|
memory_dir: Optional[str] = None
|
|
172
172
|
|
|
@@ -584,11 +584,12 @@ async def create_multi_agent_team(
|
|
|
584
584
|
from deepagents import create_deep_agent
|
|
585
585
|
from deepagents.middleware.subagents import CompiledSubAgent
|
|
586
586
|
|
|
587
|
+
summarization_mw = build_summarization_middleware(model, config.model_name, sandbox_backend)
|
|
587
588
|
middleware = [
|
|
588
589
|
BackgroundExecutionMiddleware(backend=sandbox_backend),
|
|
589
590
|
ToolResultTruncationMiddleware(),
|
|
590
591
|
TokenTrackingMiddleware(model_name=config.model_name, user_id=user_id),
|
|
591
|
-
|
|
592
|
+
summarization_mw,
|
|
592
593
|
]
|
|
593
594
|
shared = config.shared_tools or [get_current_date]
|
|
594
595
|
|
|
@@ -629,6 +630,7 @@ async def create_multi_agent_team(
|
|
|
629
630
|
|
|
630
631
|
# 创建协调者 Agent
|
|
631
632
|
coord_name = config.coordinator_name
|
|
633
|
+
coord_summarization_mw = build_summarization_middleware(model, config.model_name, sandbox_backend)
|
|
632
634
|
coordinator_agent = create_deep_agent(
|
|
633
635
|
model=model,
|
|
634
636
|
tools=config.shared_tools or [get_current_date],
|
|
@@ -644,7 +646,7 @@ async def create_multi_agent_team(
|
|
|
644
646
|
BackgroundExecutionMiddleware(backend=sandbox_backend),
|
|
645
647
|
ToolResultTruncationMiddleware(),
|
|
646
648
|
TokenTrackingMiddleware(model_name=config.model_name, user_id=user_id),
|
|
647
|
-
|
|
649
|
+
coord_summarization_mw,
|
|
648
650
|
],
|
|
649
651
|
)
|
|
650
652
|
|
|
@@ -266,8 +266,12 @@ class FileOperationsMixin:
|
|
|
266
266
|
"limit": limit
|
|
267
267
|
}, timeout=timeout)
|
|
268
268
|
if result.get("error"):
|
|
269
|
-
|
|
270
|
-
|
|
269
|
+
err_msg = result['error']
|
|
270
|
+
if 'not found' in err_msg:
|
|
271
|
+
SYLogger.warning(f"[Sandbox] 异步读取文件未找到: {err_msg}")
|
|
272
|
+
else:
|
|
273
|
+
SYLogger.error(f"[Sandbox] 异步读取文件失败: {err_msg}")
|
|
274
|
+
return ReadResult(error=err_msg)
|
|
271
275
|
content = result.get("content", "")
|
|
272
276
|
encoding = result.get("encoding", "utf-8")
|
|
273
277
|
SYLogger.info(
|
|
@@ -329,7 +333,11 @@ class FileOperationsMixin:
|
|
|
329
333
|
path=result.get("path")
|
|
330
334
|
)
|
|
331
335
|
if write_result.error:
|
|
332
|
-
|
|
336
|
+
err_msg = write_result.error
|
|
337
|
+
if 'already exists' in err_msg:
|
|
338
|
+
SYLogger.warning(f"[Sandbox] 异步写入文件已存在: {err_msg}")
|
|
339
|
+
else:
|
|
340
|
+
SYLogger.error(f"[Sandbox] 异步写入失败: {err_msg}")
|
|
333
341
|
else:
|
|
334
342
|
SYLogger.info(f"[Sandbox] 异步写入成功: {write_result.path}")
|
|
335
343
|
return write_result
|
|
@@ -1,46 +1,164 @@
|
|
|
1
1
|
"""上下文压缩 middleware 构建工具。
|
|
2
2
|
|
|
3
|
-
根据 nacos 中配置的模型 maxTokens,用绝对 token
|
|
4
|
-
|
|
3
|
+
根据 nacos 中配置的模型 maxTokens,用绝对 token 数设置压缩阈值。
|
|
4
|
+
优先使用模型 API 返回的 usage_metadata.total_tokens(真实 token 数),
|
|
5
|
+
无 metadata 时回退到 chars_per_token=2.0 的估算值。
|
|
6
|
+
同时增加基于消息数的安全阈值,防止估算偏低导致压缩不触发。
|
|
5
7
|
"""
|
|
6
8
|
|
|
7
9
|
from __future__ import annotations
|
|
10
|
+
import deepagents.middleware.summarization as _summ_mod
|
|
8
11
|
|
|
12
|
+
import functools
|
|
13
|
+
import logging
|
|
9
14
|
from typing import TYPE_CHECKING
|
|
10
15
|
|
|
11
16
|
from deepagents.middleware.summarization import (
|
|
12
17
|
SummarizationMiddleware,
|
|
13
18
|
SummarizationToolMiddleware,
|
|
14
19
|
)
|
|
20
|
+
from langchain_core.messages.utils import count_tokens_approximately
|
|
15
21
|
|
|
16
22
|
if TYPE_CHECKING:
|
|
17
23
|
from langchain_core.language_models import BaseChatModel
|
|
18
24
|
from deepagents.backends.protocol import BACKEND_TYPES
|
|
19
25
|
|
|
26
|
+
logger = logging.getLogger(__name__)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _extract_last_usage_total_tokens(messages) -> int:
|
|
30
|
+
"""从消息历史中提取最后一条 AIMessage 的 usage_metadata.total_tokens。
|
|
31
|
+
|
|
32
|
+
返回 0 表示无数据(需回退到估算)。
|
|
33
|
+
"""
|
|
34
|
+
from langchain_core.messages import AIMessage
|
|
35
|
+
for msg in reversed(messages):
|
|
36
|
+
if isinstance(msg, AIMessage):
|
|
37
|
+
meta = getattr(msg, 'usage_metadata', None)
|
|
38
|
+
if meta and isinstance(meta, dict):
|
|
39
|
+
total = meta.get('total_tokens', 0)
|
|
40
|
+
if isinstance(total, int) and total > 0:
|
|
41
|
+
return total
|
|
42
|
+
return 0
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _patched_compute_summarization_defaults(model):
|
|
46
|
+
"""覆盖 deepagents 默认值,返回中文场景修正后的绝对 token 阈值。
|
|
47
|
+
|
|
48
|
+
同时增加基于消息数的安全阈值:即使 token 估算偏低,
|
|
49
|
+
消息数超过 200 条时也会触发压缩(覆盖工具 schema 等未计入的开销)。
|
|
50
|
+
"""
|
|
51
|
+
try:
|
|
52
|
+
from sycommon.config.Config import Config
|
|
53
|
+
model_name = getattr(model, 'model_name', None) or getattr(
|
|
54
|
+
model, 'model', None)
|
|
55
|
+
if model_name:
|
|
56
|
+
llm_cfg = Config().get_llm_config(model_name)
|
|
57
|
+
max_tokens = llm_cfg.get("maxTokens", 72000)
|
|
58
|
+
else:
|
|
59
|
+
max_tokens = 72000
|
|
60
|
+
except Exception:
|
|
61
|
+
max_tokens = 72000
|
|
62
|
+
|
|
63
|
+
# 60% 触发(120K/200K):实测模型在 input≈137K 时开始退化,
|
|
64
|
+
# 在 120K 触发压缩留 ~17K 安全余量给工具 schema 等未计入开销
|
|
65
|
+
trigger = int(max_tokens * 0.60)
|
|
66
|
+
keep = int(max_tokens * 0.10)
|
|
67
|
+
return {
|
|
68
|
+
"trigger": [("tokens", trigger), ("messages", 200)],
|
|
69
|
+
"keep": ("tokens", keep),
|
|
70
|
+
"truncate_args_settings": {
|
|
71
|
+
"trigger": ("tokens", trigger),
|
|
72
|
+
"keep": ("tokens", keep),
|
|
73
|
+
},
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
# monkey-patch:替换 deepagents 的默认计算函数
|
|
78
|
+
_summ_mod.compute_summarization_defaults = _patched_compute_summarization_defaults
|
|
79
|
+
|
|
80
|
+
# monkey-patch:在内置 middleware 的 awrap_model_call 中注入真实 token + 日志
|
|
81
|
+
_OrigDeepAgentsSumm = _summ_mod._DeepAgentsSummarizationMiddleware
|
|
82
|
+
_orig_awrap_model_call = _OrigDeepAgentsSumm.awrap_model_call
|
|
83
|
+
|
|
84
|
+
# 基础估算函数,用于日志对比
|
|
85
|
+
_approx_counter = functools.partial(
|
|
86
|
+
count_tokens_approximately, chars_per_token=2.0)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
async def _patched_awrap_model_call(self, request, handler):
|
|
90
|
+
effective_messages = self._get_effective_messages(request)
|
|
91
|
+
truncated_messages, _ = self._truncate_args(
|
|
92
|
+
effective_messages, request.system_message, request.tools,
|
|
93
|
+
)
|
|
94
|
+
counted_messages = [request.system_message, *
|
|
95
|
+
truncated_messages] if request.system_message is not None else truncated_messages
|
|
96
|
+
|
|
97
|
+
# 从截断前的 effective_messages 提取真实 token(截断会丢失 usage_metadata)
|
|
98
|
+
real_tokens = _extract_last_usage_total_tokens(effective_messages)
|
|
99
|
+
|
|
100
|
+
# 估算值(用于日志对比)
|
|
101
|
+
try:
|
|
102
|
+
estimated = _approx_counter(counted_messages, tools=request.tools)
|
|
103
|
+
except TypeError:
|
|
104
|
+
estimated = _approx_counter(counted_messages)
|
|
105
|
+
|
|
106
|
+
# 如果有真实 token,临时替换 token_counter 使 _orig 内部判断也用真实值
|
|
107
|
+
# 这样 _should_summarize 和 _determine_cutoff_index 都能拿到正确的 token 数
|
|
108
|
+
if real_tokens > 0:
|
|
109
|
+
original_counter = self.token_counter
|
|
110
|
+
|
|
111
|
+
def _real_counter(msgs, **kwargs):
|
|
112
|
+
# 优先从当前消息中提取真实值(压缩后的消息可能有新的 metadata)
|
|
113
|
+
r = _extract_last_usage_total_tokens(msgs)
|
|
114
|
+
return r if r > 0 else real_tokens
|
|
115
|
+
|
|
116
|
+
self._lc_helper.token_counter = _real_counter
|
|
117
|
+
try:
|
|
118
|
+
result = await _orig_awrap_model_call(self, request, handler)
|
|
119
|
+
finally:
|
|
120
|
+
self._lc_helper.token_counter = original_counter
|
|
121
|
+
else:
|
|
122
|
+
result = await _orig_awrap_model_call(self, request, handler)
|
|
123
|
+
|
|
124
|
+
# 日志
|
|
125
|
+
source = 'real' if real_tokens > 0 else 'estimated'
|
|
126
|
+
should = self._should_summarize(truncated_messages, real_tokens if real_tokens > 0 else estimated)
|
|
127
|
+
print(
|
|
128
|
+
f"[TokenCount] real={real_tokens} estimated={estimated} "
|
|
129
|
+
f"source={source} msgs={len(counted_messages)} "
|
|
130
|
+
f"should_summarize={should} "
|
|
131
|
+
f"trigger={getattr(self._lc_helper, 'trigger', '?')}")
|
|
132
|
+
return result
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
_OrigDeepAgentsSumm.awrap_model_call = _patched_awrap_model_call
|
|
136
|
+
|
|
20
137
|
|
|
21
138
|
def build_summarization_middleware(
|
|
22
139
|
model: BaseChatModel,
|
|
23
140
|
model_name: str,
|
|
24
141
|
backend: "BACKEND_TYPES",
|
|
25
142
|
*,
|
|
26
|
-
trigger_fraction: float = 0.
|
|
143
|
+
trigger_fraction: float = 0.60,
|
|
27
144
|
keep_fraction: float = 0.10,
|
|
28
145
|
default_max_tokens: int = 200000,
|
|
29
146
|
) -> SummarizationToolMiddleware:
|
|
30
|
-
"""
|
|
147
|
+
"""根据模型上下文窗口大小构建 compact_conversation 工具 middleware。
|
|
31
148
|
|
|
32
|
-
|
|
149
|
+
优先使用模型返回的 usage_metadata 真实 token 数进行压缩判断,
|
|
150
|
+
无 usage_metadata 时回退到 chars_per_token=2.0 估算。
|
|
33
151
|
|
|
34
152
|
Args:
|
|
35
153
|
model: LLM 实例。
|
|
36
154
|
model_name: 模型名称(用于从 nacos 读取配置)。
|
|
37
155
|
backend: 后端实例。
|
|
38
|
-
trigger_fraction:
|
|
39
|
-
keep_fraction:
|
|
156
|
+
trigger_fraction: 触发压缩占有效输入的比例,默认 60%。
|
|
157
|
+
keep_fraction: 压缩后保留占有效输入的比例,默认 10%。
|
|
40
158
|
default_max_tokens: 无法从配置读取时的默认上下文窗口大小。
|
|
41
159
|
|
|
42
160
|
Returns:
|
|
43
|
-
SummarizationToolMiddleware
|
|
161
|
+
SummarizationToolMiddleware 实例(提供 compact_conversation 工具)。
|
|
44
162
|
"""
|
|
45
163
|
try:
|
|
46
164
|
from sycommon.config.Config import Config
|
|
@@ -58,11 +176,17 @@ def build_summarization_middleware(
|
|
|
58
176
|
backend=backend,
|
|
59
177
|
trigger=("tokens", trigger_tokens),
|
|
60
178
|
keep=("tokens", keep_tokens),
|
|
179
|
+
token_counter=functools.partial(
|
|
180
|
+
count_tokens_approximately, chars_per_token=2.0),
|
|
61
181
|
trim_tokens_to_summarize=None,
|
|
62
182
|
truncate_args_settings={
|
|
63
183
|
"trigger": ("tokens", trigger_tokens),
|
|
64
184
|
"keep": ("tokens", keep_tokens),
|
|
65
|
-
"max_length": 2000,
|
|
66
185
|
},
|
|
67
186
|
)
|
|
187
|
+
|
|
188
|
+
print(f"[Summarization] compact_conversation 工具配置: model={model_name}, "
|
|
189
|
+
f"max_tokens={max_tokens}, "
|
|
190
|
+
f"trigger={trigger_tokens} tokens ({trigger_fraction:.0%}), "
|
|
191
|
+
f"keep={keep_tokens} tokens ({keep_fraction:.0%})")
|
|
68
192
|
return SummarizationToolMiddleware(summ)
|
sycommon/config/LLMConfig.py
CHANGED
sycommon/llm/get_llm.py
CHANGED
|
@@ -153,7 +153,7 @@ def get_llm(
|
|
|
153
153
|
|
|
154
154
|
# 根据 wrap_structured 决定默认超时
|
|
155
155
|
if timeout is _TIMEOUT_UNSET:
|
|
156
|
-
timeout = 180 if wrap_structured else
|
|
156
|
+
timeout = 180 if wrap_structured else 300
|
|
157
157
|
|
|
158
158
|
init_params = {
|
|
159
159
|
"model_provider": llmConfig.provider,
|
|
@@ -165,9 +165,13 @@ def get_llm(
|
|
|
165
165
|
"streaming": streaming,
|
|
166
166
|
"timeout": timeout,
|
|
167
167
|
"max_retries": max_retries,
|
|
168
|
-
"stream_chunk_timeout":
|
|
168
|
+
"stream_chunk_timeout": 180,
|
|
169
169
|
}
|
|
170
170
|
|
|
171
|
+
# 传入 maxOutputTokens(max_completion_tokens)确保模型有足够的输出 token 空间
|
|
172
|
+
if llmConfig.maxOutputTokens:
|
|
173
|
+
init_params["max_tokens"] = llmConfig.maxOutputTokens
|
|
174
|
+
|
|
171
175
|
# 合并其他透传参数(包括 presence_penalty, extra_body, top_p 等)
|
|
172
176
|
init_params.update(kwargs)
|
|
173
177
|
|
|
@@ -230,6 +230,23 @@ class LLMWithTokenTracking(BaseChatModel):
|
|
|
230
230
|
"output_tokens": total_output,
|
|
231
231
|
})
|
|
232
232
|
|
|
233
|
+
def _resolve_model_profile(self):
|
|
234
|
+
"""从 llmConfig.maxTokens 构建 profile,供 deepagents 计算压缩阈值使用。"""
|
|
235
|
+
if self.llmConfig and self.llmConfig.maxTokens:
|
|
236
|
+
return {"max_input_tokens": self.llmConfig.maxTokens}
|
|
237
|
+
if self.llm and hasattr(self.llm, 'profile') and self.llm.profile:
|
|
238
|
+
return self.llm.profile
|
|
239
|
+
return None
|
|
240
|
+
|
|
241
|
+
@property
|
|
242
|
+
def profile(self):
|
|
243
|
+
p = self._resolve_model_profile()
|
|
244
|
+
return p
|
|
245
|
+
|
|
246
|
+
@profile.setter
|
|
247
|
+
def profile(self, value):
|
|
248
|
+
pass
|
|
249
|
+
|
|
233
250
|
def bind_tools(self, tools, *, tool_choice=None, **kwargs):
|
|
234
251
|
"""绑定工具 - 委托给底层 LLM,返回绑定后的 Runnable"""
|
|
235
252
|
return self.llm.bind_tools(tools, tool_choice=tool_choice, **kwargs)
|
sycommon/llm/struct_token.py
CHANGED
|
@@ -445,10 +445,10 @@ class StructuredRunnableWithToken(Runnable):
|
|
|
445
445
|
|
|
446
446
|
# 2. 检查并执行上下文压缩 (仅在异步模式且开启时)
|
|
447
447
|
if self.enable_compression:
|
|
448
|
-
max_tokens = self.llmConfig.maxTokens
|
|
448
|
+
max_tokens = int(self.llmConfig.maxTokens * self.threshold_ratio)
|
|
449
449
|
current_tokens = self._count_tokens(adapted_input)
|
|
450
450
|
|
|
451
|
-
if current_tokens > max_tokens
|
|
451
|
+
if current_tokens > max_tokens:
|
|
452
452
|
SYLogger.warning(
|
|
453
453
|
f"⚠️ Context limit reached: {current_tokens}/{max_tokens}")
|
|
454
454
|
# 执行压缩,替换 adapted_input
|
sycommon/llm/sy_langfuse.py
CHANGED
|
@@ -3,11 +3,26 @@ from typing import Tuple, List, Optional, Any, Dict
|
|
|
3
3
|
from langfuse import Langfuse, get_client
|
|
4
4
|
from sycommon.config.Config import Config, SingletonMeta
|
|
5
5
|
from sycommon.logging.kafka_log import SYLogger
|
|
6
|
-
from langfuse.langchain import CallbackHandler
|
|
6
|
+
from langfuse.langchain import CallbackHandler as _LangfuseCallbackHandler
|
|
7
7
|
from sycommon.tools.env import get_env_var
|
|
8
8
|
from sycommon.tools.merge_headers import get_header_value
|
|
9
9
|
|
|
10
10
|
|
|
11
|
+
class SafeLangfuseCallbackHandler(_LangfuseCallbackHandler):
|
|
12
|
+
"""Wraps Langfuse CallbackHandler to handle unpicklable error objects.
|
|
13
|
+
|
|
14
|
+
LangGraph errors (e.g. Command/Interrupt dataclasses) may hold
|
|
15
|
+
_asyncio.Task references. When Langfuse calls str(error) → asdict() →
|
|
16
|
+
deepcopy(), this raises TypeError: cannot pickle '_asyncio.Task' object.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
def on_chain_error(self, error, **kwargs):
|
|
20
|
+
try:
|
|
21
|
+
super().on_chain_error(error, **kwargs)
|
|
22
|
+
except (TypeError, ValueError):
|
|
23
|
+
pass
|
|
24
|
+
|
|
25
|
+
|
|
11
26
|
class LangfuseInitializer(metaclass=SingletonMeta):
|
|
12
27
|
"""
|
|
13
28
|
Langfuse 初始化管理器
|
|
@@ -46,14 +61,14 @@ class LangfuseInitializer(metaclass=SingletonMeta):
|
|
|
46
61
|
'baseUrl', '')
|
|
47
62
|
os.environ["LANGFUSE_TRACING_ENVIRONMENT"] = environment
|
|
48
63
|
os.environ["OTEL_SERVICE_NAME"] = server_name
|
|
49
|
-
#
|
|
50
|
-
os.environ["
|
|
51
|
-
|
|
52
|
-
os.environ["
|
|
64
|
+
# 控制 Langfuse OTLP 导出:超时 30s、每批最多 32 个 span、每 10s 强制 flush
|
|
65
|
+
os.environ["LANGFUSE_TIMEOUT"] = "30"
|
|
66
|
+
os.environ["LANGFUSE_FLUSH_AT"] = "32"
|
|
67
|
+
os.environ["LANGFUSE_FLUSH_INTERVAL"] = "10"
|
|
53
68
|
|
|
54
69
|
self._langfuse_client = get_client()
|
|
55
70
|
|
|
56
|
-
langfuse_handler =
|
|
71
|
+
langfuse_handler = SafeLangfuseCallbackHandler()
|
|
57
72
|
self._base_callbacks.append(langfuse_handler)
|
|
58
73
|
|
|
59
74
|
SYLogger.info(f"Langfuse 初始化成功 [Service: {server_name}]")
|
|
@@ -5,6 +5,9 @@
|
|
|
5
5
|
|
|
6
6
|
通过 wrap_tool_call / awrap_tool_call 钩子拦截每个工具调用结果,
|
|
7
7
|
在结果进入对话历史之前完成截断。
|
|
8
|
+
|
|
9
|
+
同时处理 list 类型 content(如 read_file 读取图片返回的 base64 数据),
|
|
10
|
+
将其转换为字符串描述,防止上游 API 拒绝 list content 的 400 错误。
|
|
8
11
|
"""
|
|
9
12
|
|
|
10
13
|
from collections.abc import Awaitable, Callable
|
|
@@ -41,6 +44,38 @@ DEFAULT_TRUNCATION_SUFFIX = (
|
|
|
41
44
|
"2. 将输出重定向到文件后用 read_file 分段读取]"
|
|
42
45
|
)
|
|
43
46
|
|
|
47
|
+
# list 类型 content 中 base64 图片的最大字符数
|
|
48
|
+
MAX_IMAGE_BASE64_CHARS = 500
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _convert_list_content_to_str(content: list, tool_name: str) -> str:
|
|
52
|
+
"""将 list 类型的 ToolMessage.content 转换为字符串。
|
|
53
|
+
|
|
54
|
+
对于图片类型(type=image 且含 base64),转换为提示模型自行用 execute 处理的说明。
|
|
55
|
+
对于文本类型,保留完整文本。
|
|
56
|
+
"""
|
|
57
|
+
parts = []
|
|
58
|
+
for item in content:
|
|
59
|
+
if isinstance(item, dict):
|
|
60
|
+
item_type = item.get("type", "text")
|
|
61
|
+
if item_type == "image" and "base64" in item:
|
|
62
|
+
b64 = item["base64"]
|
|
63
|
+
mime = item.get("mime_type", "image/unknown")
|
|
64
|
+
size_kb = len(b64) * 3 // 4 // 1024 # base64 -> bytes -> KB
|
|
65
|
+
parts.append(
|
|
66
|
+
f"[图片文件 ({mime}, {size_kb}KB) — "
|
|
67
|
+
f"图片二进制数据无法直接发送给模型。"
|
|
68
|
+
f"请使用 execute 工具通过 Python (如 PIL/OpenCV) 读取并处理图片,"
|
|
69
|
+
f"例如 OCR 识别文字、分析图片内容等。]"
|
|
70
|
+
)
|
|
71
|
+
elif "text" in item:
|
|
72
|
+
parts.append(item["text"])
|
|
73
|
+
else:
|
|
74
|
+
parts.append(str(item))
|
|
75
|
+
else:
|
|
76
|
+
parts.append(str(item))
|
|
77
|
+
return "\n".join(parts)
|
|
78
|
+
|
|
44
79
|
|
|
45
80
|
class ToolResultTruncationMiddleware(AgentMiddleware):
|
|
46
81
|
"""截断过长的工具结果,防止超出模型上下文窗口。
|
|
@@ -102,6 +137,27 @@ class ToolResultTruncationMiddleware(AgentMiddleware):
|
|
|
102
137
|
return result
|
|
103
138
|
|
|
104
139
|
content = result.content
|
|
140
|
+
|
|
141
|
+
# list 类型 content:如 read_file 读取图片返回的 base64 数据
|
|
142
|
+
# 转换为字符串描述,防止上游 API 拒绝 list content
|
|
143
|
+
if isinstance(content, list):
|
|
144
|
+
has_image = any(
|
|
145
|
+
isinstance(item, dict) and item.get("type") == "image" and "base64" in item
|
|
146
|
+
for item in content
|
|
147
|
+
)
|
|
148
|
+
if has_image:
|
|
149
|
+
new_content = _convert_list_content_to_str(content, tool_name)
|
|
150
|
+
SYLogger.info(
|
|
151
|
+
f"[ToolResultTruncation] tool='{tool_name}' converted image "
|
|
152
|
+
f"list content to string ({len(new_content)} chars)")
|
|
153
|
+
return ToolMessage(
|
|
154
|
+
content=new_content,
|
|
155
|
+
tool_call_id=result.tool_call_id,
|
|
156
|
+
name=result.name,
|
|
157
|
+
status=result.status,
|
|
158
|
+
artifact=result.artifact,
|
|
159
|
+
)
|
|
160
|
+
|
|
105
161
|
if not isinstance(content, str):
|
|
106
162
|
return result
|
|
107
163
|
|
|
@@ -31,7 +31,7 @@ class RabbitMQListenerConfig(BaseModel):
|
|
|
31
31
|
durable: bool = Field(True, description="是否持久化")
|
|
32
32
|
auto_delete: bool = Field(False, description="是否自动删除队列")
|
|
33
33
|
auto_parse_json: bool = Field(True, description="是否自动解析JSON消息")
|
|
34
|
-
prefetch_count: int = Field(2, description="
|
|
34
|
+
prefetch_count: int = Field(2, description="同时消费并发数(控制MQ预取与handler并发上限)")
|
|
35
35
|
|
|
36
36
|
class Config:
|
|
37
37
|
"""模型配置"""
|