sycommon-python-lib 0.2.2a18__py3-none-any.whl → 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -51,6 +51,7 @@ from sycommon.agent.sandbox.sandbox_recovery import SandboxRecoveryManager
51
51
  from sycommon.agent.chat_events import ChatEvent, ChatEventBuilder, DEFAULT_AGENT_NAME
52
52
  from sycommon.middleware.background_execution import BackgroundExecutionMiddleware
53
53
  from sycommon.middleware.token_tracking import TokenTrackingMiddleware
54
+ from sycommon.middleware.tool_result_truncation import ToolResultTruncationMiddleware
54
55
  from deepagents.middleware.summarization import create_summarization_tool_middleware # noqa: F401 保留 re-export
55
56
  from sycommon.agent.summarization_utils import build_summarization_middleware
56
57
 
@@ -107,7 +108,7 @@ class AgentConfig(BaseModel):
107
108
 
108
109
  # 沙箱配置
109
110
  sandbox_service_name: str = "shengye-platform-sandbox"
110
- sandbox_timeout: int = 60
111
+ sandbox_timeout: int = 300
111
112
  skills_dir: Optional[str] = None
112
113
  memory_dir: Optional[str] = None
113
114
 
@@ -167,6 +168,7 @@ class DeepAgent:
167
168
  """
168
169
  current_tool_calls = []
169
170
  ai_chunk_buffer = ""
171
+ ai_text_content = ""
170
172
  seen_tool_call_ids = set()
171
173
  stream_step = 0
172
174
  # 兜底:累积流式 chunk 中的 usage_metadata(middleware 在流式场景可能拿不到)
@@ -210,6 +212,12 @@ class DeepAgent:
210
212
  if usage_meta:
211
213
  total_input_tokens += usage_meta.get("input_tokens", 0)
212
214
  total_output_tokens += usage_meta.get("output_tokens", 0)
215
+ if usage_meta.get("input_tokens", 0) > 0:
216
+ SYLogger.debug(
217
+ f"[DeepAgent] usage_metadata | input={usage_meta.get('input_tokens', 0)} "
218
+ f"output={usage_meta.get('output_tokens', 0)} "
219
+ f"total={usage_meta.get('total_tokens', 0)} "
220
+ f"cumulative_input={total_input_tokens} step={stream_step}")
213
221
 
214
222
  if msg_type == "AIMessageChunk":
215
223
  tool_calls_log = getattr(msg, "tool_calls", [])
@@ -230,9 +238,32 @@ class DeepAgent:
230
238
  f"[DeepAgent] AIMessage | content={repr(content_log)} | tools={tc_names}")
231
239
  elif msg_type == "ToolMessage":
232
240
  content_log = (msg.content or "")
233
- preview = content_log[:100]
234
- print(
235
- f"[DeepAgent] ToolResult | {getattr(msg, 'name', '?')} | len={len(content_log)} | preview={repr(preview)}")
241
+ if isinstance(content_log, list):
242
+ # e.g. [{'type': 'image', 'base64': '...'}]
243
+ preview_parts = []
244
+ total_len = 0
245
+ for item in content_log:
246
+ if isinstance(item, dict):
247
+ t = item.get('type', '?')
248
+ if t == 'image' and 'base64' in item:
249
+ b64 = item['base64']
250
+ total_len += len(b64)
251
+ preview_parts.append(
252
+ f"{{'type': 'image', 'base64': '{b64[:100]}...'({len(b64)} chars)}}")
253
+ else:
254
+ s = str(item)
255
+ total_len += len(s)
256
+ preview_parts.append(s[:100])
257
+ else:
258
+ s = str(item)
259
+ total_len += len(s)
260
+ preview_parts.append(s[:100])
261
+ print(
262
+ f"[DeepAgent] ToolResult | {getattr(msg, 'name', '?')} | len={total_len} | preview={preview_parts}")
263
+ else:
264
+ preview = str(content_log)[:100]
265
+ print(
266
+ f"[DeepAgent] ToolResult | {getattr(msg, 'name', '?')} | len={len(str(content_log))} | preview={repr(preview)}")
236
267
  elif msg_type == "HumanMessage":
237
268
  content_log = (msg.content or "")[:100]
238
269
  print(
@@ -373,6 +404,7 @@ class DeepAgent:
373
404
 
374
405
  if content:
375
406
  ai_chunk_buffer += content
407
+ ai_text_content += content
376
408
  event = ChatEventBuilder.ai_chunk(
377
409
  content, id=getattr(msg, "id", None),
378
410
  agent=DEFAULT_AGENT_NAME)
@@ -429,6 +461,13 @@ class DeepAgent:
429
461
  print(
430
462
  f"[DeepAgent] AI chunk done | {repr(ai_chunk_buffer[:100])}...")
431
463
 
464
+ # 空响应检测:模型被调用但没有产出任何文本
465
+ if not ai_text_content and not ai_chunk_buffer:
466
+ SYLogger.warning(
467
+ f"[DeepAgent] 空响应警告:模型未返回任何文本内容。"
468
+ f"stream_step={stream_step}, tool_calls={len(current_tool_calls)}, "
469
+ f"input_tokens={total_input_tokens}, output_tokens={total_output_tokens}")
470
+
432
471
  # 兜底:如果 middleware 没有成功记录(流式场景),在这里补充记录
433
472
  if total_input_tokens > 0 or total_output_tokens > 0:
434
473
  try:
@@ -484,11 +523,19 @@ class DeepAgent:
484
523
  max_retries: int = 3,
485
524
  base_delay: float = 1.0,
486
525
  ) -> AsyncGenerator:
487
- """带重试机制的 astream"""
526
+ """带重试机制的 astream
527
+
528
+ 网络错误指数退避重试,BadRequestError 不重试。
529
+ 上下文溢出由 deepagents SummarizationMiddleware 在模型调用层处理
530
+ (捕获 ContextOverflowError 后压缩消息并重试 LLM 调用),
531
+ 不在此处重试整个 graph。
532
+ """
488
533
  import httpx
489
534
  from openai import APIConnectionError, APIError, APITimeoutError
490
535
 
491
536
  last_error = None
537
+ sandbox_retries = 0
538
+ max_sandbox_retries = 3
492
539
 
493
540
  while True:
494
541
  for attempt in range(max_retries):
@@ -511,8 +558,9 @@ class DeepAgent:
511
558
  else:
512
559
  raise last_error
513
560
  except RuntimeError as e:
514
- if "沙箱服务不可用" in str(e) and self.recovery_manager:
515
- SYLogger.warning("[DeepAgent] 沙箱服务不可用,尝试恢复...")
561
+ if "沙箱服务不可用" in str(e) and self.recovery_manager and sandbox_retries < max_sandbox_retries:
562
+ sandbox_retries += 1
563
+ SYLogger.warning(f"[DeepAgent] 沙箱服务不可用,尝试恢复 ({sandbox_retries}/{max_sandbox_retries})...")
516
564
  recovered = await self.recovery_manager.recover()
517
565
  if recovered:
518
566
  SYLogger.info("[DeepAgent] 沙箱已恢复,继续执行")
@@ -584,7 +632,8 @@ async def create_deep_agent(
584
632
  # 创建 agent
585
633
  from deepagents import create_deep_agent as _create_deep_agent
586
634
 
587
- # 根据模型上下文窗口大小配置压缩阈值
635
+ # 创建 compact_conversation 工具 middleware
636
+ # 自动压缩由 deepagents 内置的 SummarizationMiddleware 处理(通过 model.profile 获取阈值)
588
637
  summarization_mw = build_summarization_middleware(
589
638
  model, config.model_name, sandbox_backend,
590
639
  )
@@ -598,6 +647,7 @@ async def create_deep_agent(
598
647
  "debug": config.debug,
599
648
  "middleware": [
600
649
  BackgroundExecutionMiddleware(backend=sandbox_backend),
650
+ ToolResultTruncationMiddleware(),
601
651
  TokenTrackingMiddleware(model_name=config.model_name, user_id=user_id),
602
652
  summarization_mw,
603
653
  ],
@@ -44,6 +44,7 @@ from sycommon.agent.sandbox.sandbox_recovery import SandboxRecoveryManager
44
44
  from sycommon.agent.chat_events import ChatEvent, ChatEventBuilder, DEFAULT_AGENT_NAME
45
45
  from sycommon.middleware.background_execution import BackgroundExecutionMiddleware
46
46
  from sycommon.middleware.token_tracking import TokenTrackingMiddleware
47
+ from sycommon.middleware.tool_result_truncation import ToolResultTruncationMiddleware
47
48
  from deepagents.middleware.summarization import create_summarization_tool_middleware # noqa: F401 保留 re-export
48
49
  from sycommon.agent.summarization_utils import build_summarization_middleware
49
50
 
@@ -165,7 +166,7 @@ class TeamConfig(BaseModel):
165
166
  coordinator_name: str = "项目经理"
166
167
 
167
168
  sandbox_service_name: str = "shengye-platform-sandbox"
168
- sandbox_timeout: int = 60
169
+ sandbox_timeout: int = 300
169
170
  skills_dir: Optional[str] = None
170
171
  memory_dir: Optional[str] = None
171
172
 
@@ -583,10 +584,12 @@ async def create_multi_agent_team(
583
584
  from deepagents import create_deep_agent
584
585
  from deepagents.middleware.subagents import CompiledSubAgent
585
586
 
587
+ summarization_mw = build_summarization_middleware(model, config.model_name, sandbox_backend)
586
588
  middleware = [
587
589
  BackgroundExecutionMiddleware(backend=sandbox_backend),
590
+ ToolResultTruncationMiddleware(),
588
591
  TokenTrackingMiddleware(model_name=config.model_name, user_id=user_id),
589
- build_summarization_middleware(model, config.model_name, sandbox_backend),
592
+ summarization_mw,
590
593
  ]
591
594
  shared = config.shared_tools or [get_current_date]
592
595
 
@@ -627,6 +630,7 @@ async def create_multi_agent_team(
627
630
 
628
631
  # 创建协调者 Agent
629
632
  coord_name = config.coordinator_name
633
+ coord_summarization_mw = build_summarization_middleware(model, config.model_name, sandbox_backend)
630
634
  coordinator_agent = create_deep_agent(
631
635
  model=model,
632
636
  tools=config.shared_tools or [get_current_date],
@@ -640,8 +644,9 @@ async def create_multi_agent_team(
640
644
  name=coord_name,
641
645
  middleware=[
642
646
  BackgroundExecutionMiddleware(backend=sandbox_backend),
647
+ ToolResultTruncationMiddleware(),
643
648
  TokenTrackingMiddleware(model_name=config.model_name, user_id=user_id),
644
- build_summarization_middleware(model, config.model_name, sandbox_backend),
649
+ coord_summarization_mw,
645
650
  ],
646
651
  )
647
652
 
@@ -266,8 +266,12 @@ class FileOperationsMixin:
266
266
  "limit": limit
267
267
  }, timeout=timeout)
268
268
  if result.get("error"):
269
- SYLogger.error(f"[Sandbox] 异步读取文件失败: {result['error']}")
270
- return ReadResult(error=result["error"])
269
+ err_msg = result['error']
270
+ if 'not found' in err_msg:
271
+ SYLogger.warning(f"[Sandbox] 异步读取文件未找到: {err_msg}")
272
+ else:
273
+ SYLogger.error(f"[Sandbox] 异步读取文件失败: {err_msg}")
274
+ return ReadResult(error=err_msg)
271
275
  content = result.get("content", "")
272
276
  encoding = result.get("encoding", "utf-8")
273
277
  SYLogger.info(
@@ -329,7 +333,11 @@ class FileOperationsMixin:
329
333
  path=result.get("path")
330
334
  )
331
335
  if write_result.error:
332
- SYLogger.error(f"[Sandbox] 异步写入失败: {write_result.error}")
336
+ err_msg = write_result.error
337
+ if 'already exists' in err_msg:
338
+ SYLogger.warning(f"[Sandbox] 异步写入文件已存在: {err_msg}")
339
+ else:
340
+ SYLogger.error(f"[Sandbox] 异步写入失败: {err_msg}")
333
341
  else:
334
342
  SYLogger.info(f"[Sandbox] 异步写入成功: {write_result.path}")
335
343
  return write_result
@@ -1,46 +1,164 @@
1
1
  """上下文压缩 middleware 构建工具。
2
2
 
3
- 根据 nacos 中配置的模型 maxTokens,用绝对 token 数设置压缩阈值,
4
- 避免依赖模型 profile 信息(部分模型不提供 profile)。
3
+ 根据 nacos 中配置的模型 maxTokens,用绝对 token 数设置压缩阈值。
4
+ 优先使用模型 API 返回的 usage_metadata.total_tokens(真实 token 数),
5
+ 无 metadata 时回退到 chars_per_token=2.0 的估算值。
6
+ 同时增加基于消息数的安全阈值,防止估算偏低导致压缩不触发。
5
7
  """
6
8
 
7
9
  from __future__ import annotations
10
+ import deepagents.middleware.summarization as _summ_mod
8
11
 
12
+ import functools
13
+ import logging
9
14
  from typing import TYPE_CHECKING
10
15
 
11
16
  from deepagents.middleware.summarization import (
12
17
  SummarizationMiddleware,
13
18
  SummarizationToolMiddleware,
14
19
  )
20
+ from langchain_core.messages.utils import count_tokens_approximately
15
21
 
16
22
  if TYPE_CHECKING:
17
23
  from langchain_core.language_models import BaseChatModel
18
24
  from deepagents.backends.protocol import BACKEND_TYPES
19
25
 
26
+ logger = logging.getLogger(__name__)
27
+
28
+
29
+ def _extract_last_usage_total_tokens(messages) -> int:
30
+ """从消息历史中提取最后一条 AIMessage 的 usage_metadata.total_tokens。
31
+
32
+ 返回 0 表示无数据(需回退到估算)。
33
+ """
34
+ from langchain_core.messages import AIMessage
35
+ for msg in reversed(messages):
36
+ if isinstance(msg, AIMessage):
37
+ meta = getattr(msg, 'usage_metadata', None)
38
+ if meta and isinstance(meta, dict):
39
+ total = meta.get('total_tokens', 0)
40
+ if isinstance(total, int) and total > 0:
41
+ return total
42
+ return 0
43
+
44
+
45
+ def _patched_compute_summarization_defaults(model):
46
+ """覆盖 deepagents 默认值,返回中文场景修正后的绝对 token 阈值。
47
+
48
+ 同时增加基于消息数的安全阈值:即使 token 估算偏低,
49
+ 消息数超过 200 条时也会触发压缩(覆盖工具 schema 等未计入的开销)。
50
+ """
51
+ try:
52
+ from sycommon.config.Config import Config
53
+ model_name = getattr(model, 'model_name', None) or getattr(
54
+ model, 'model', None)
55
+ if model_name:
56
+ llm_cfg = Config().get_llm_config(model_name)
57
+ max_tokens = llm_cfg.get("maxTokens", 72000)
58
+ else:
59
+ max_tokens = 72000
60
+ except Exception:
61
+ max_tokens = 72000
62
+
63
+ # 60% 触发(120K/200K):实测模型在 input≈137K 时开始退化,
64
+ # 在 120K 触发压缩留 ~17K 安全余量给工具 schema 等未计入开销
65
+ trigger = int(max_tokens * 0.60)
66
+ keep = int(max_tokens * 0.10)
67
+ return {
68
+ "trigger": [("tokens", trigger), ("messages", 200)],
69
+ "keep": ("tokens", keep),
70
+ "truncate_args_settings": {
71
+ "trigger": ("tokens", trigger),
72
+ "keep": ("tokens", keep),
73
+ },
74
+ }
75
+
76
+
77
+ # monkey-patch:替换 deepagents 的默认计算函数
78
+ _summ_mod.compute_summarization_defaults = _patched_compute_summarization_defaults
79
+
80
+ # monkey-patch:在内置 middleware 的 awrap_model_call 中注入真实 token + 日志
81
+ _OrigDeepAgentsSumm = _summ_mod._DeepAgentsSummarizationMiddleware
82
+ _orig_awrap_model_call = _OrigDeepAgentsSumm.awrap_model_call
83
+
84
+ # 基础估算函数,用于日志对比
85
+ _approx_counter = functools.partial(
86
+ count_tokens_approximately, chars_per_token=2.0)
87
+
88
+
89
+ async def _patched_awrap_model_call(self, request, handler):
90
+ effective_messages = self._get_effective_messages(request)
91
+ truncated_messages, _ = self._truncate_args(
92
+ effective_messages, request.system_message, request.tools,
93
+ )
94
+ counted_messages = [request.system_message, *
95
+ truncated_messages] if request.system_message is not None else truncated_messages
96
+
97
+ # 从截断前的 effective_messages 提取真实 token(截断会丢失 usage_metadata)
98
+ real_tokens = _extract_last_usage_total_tokens(effective_messages)
99
+
100
+ # 估算值(用于日志对比)
101
+ try:
102
+ estimated = _approx_counter(counted_messages, tools=request.tools)
103
+ except TypeError:
104
+ estimated = _approx_counter(counted_messages)
105
+
106
+ # 如果有真实 token,临时替换 token_counter 使 _orig 内部判断也用真实值
107
+ # 这样 _should_summarize 和 _determine_cutoff_index 都能拿到正确的 token 数
108
+ if real_tokens > 0:
109
+ original_counter = self.token_counter
110
+
111
+ def _real_counter(msgs, **kwargs):
112
+ # 优先从当前消息中提取真实值(压缩后的消息可能有新的 metadata)
113
+ r = _extract_last_usage_total_tokens(msgs)
114
+ return r if r > 0 else real_tokens
115
+
116
+ self._lc_helper.token_counter = _real_counter
117
+ try:
118
+ result = await _orig_awrap_model_call(self, request, handler)
119
+ finally:
120
+ self._lc_helper.token_counter = original_counter
121
+ else:
122
+ result = await _orig_awrap_model_call(self, request, handler)
123
+
124
+ # 日志
125
+ source = 'real' if real_tokens > 0 else 'estimated'
126
+ should = self._should_summarize(truncated_messages, real_tokens if real_tokens > 0 else estimated)
127
+ print(
128
+ f"[TokenCount] real={real_tokens} estimated={estimated} "
129
+ f"source={source} msgs={len(counted_messages)} "
130
+ f"should_summarize={should} "
131
+ f"trigger={getattr(self._lc_helper, 'trigger', '?')}")
132
+ return result
133
+
134
+
135
+ _OrigDeepAgentsSumm.awrap_model_call = _patched_awrap_model_call
136
+
20
137
 
21
138
  def build_summarization_middleware(
22
139
  model: BaseChatModel,
23
140
  model_name: str,
24
141
  backend: "BACKEND_TYPES",
25
142
  *,
26
- trigger_fraction: float = 0.85,
143
+ trigger_fraction: float = 0.60,
27
144
  keep_fraction: float = 0.10,
28
145
  default_max_tokens: int = 200000,
29
146
  ) -> SummarizationToolMiddleware:
30
- """根据模型上下文窗口大小构建压缩 middleware。
147
+ """根据模型上下文窗口大小构建 compact_conversation 工具 middleware。
31
148
 
32
- 使用绝对 token 数而非 fraction 模式,避免要求模型提供 profile 信息。
149
+ 优先使用模型返回的 usage_metadata 真实 token 数进行压缩判断,
150
+ 无 usage_metadata 时回退到 chars_per_token=2.0 估算。
33
151
 
34
152
  Args:
35
153
  model: LLM 实例。
36
154
  model_name: 模型名称(用于从 nacos 读取配置)。
37
155
  backend: 后端实例。
38
- trigger_fraction: 触发压缩的上下文窗口比例,默认 85%。
39
- keep_fraction: 压缩后保留的上下文窗口比例,默认 10%。
156
+ trigger_fraction: 触发压缩占有效输入的比例,默认 60%。
157
+ keep_fraction: 压缩后保留占有效输入的比例,默认 10%。
40
158
  default_max_tokens: 无法从配置读取时的默认上下文窗口大小。
41
159
 
42
160
  Returns:
43
- SummarizationToolMiddleware 实例(包含自动压缩 + compact_conversation 工具)。
161
+ SummarizationToolMiddleware 实例(提供 compact_conversation 工具)。
44
162
  """
45
163
  try:
46
164
  from sycommon.config.Config import Config
@@ -58,11 +176,17 @@ def build_summarization_middleware(
58
176
  backend=backend,
59
177
  trigger=("tokens", trigger_tokens),
60
178
  keep=("tokens", keep_tokens),
179
+ token_counter=functools.partial(
180
+ count_tokens_approximately, chars_per_token=2.0),
61
181
  trim_tokens_to_summarize=None,
62
182
  truncate_args_settings={
63
183
  "trigger": ("tokens", trigger_tokens),
64
184
  "keep": ("tokens", keep_tokens),
65
- "max_length": 2000,
66
185
  },
67
186
  )
187
+
188
+ print(f"[Summarization] compact_conversation 工具配置: model={model_name}, "
189
+ f"max_tokens={max_tokens}, "
190
+ f"trigger={trigger_tokens} tokens ({trigger_fraction:.0%}), "
191
+ f"keep={keep_tokens} tokens ({keep_fraction:.0%})")
68
192
  return SummarizationToolMiddleware(summ)
@@ -11,6 +11,7 @@ class LLMConfig(BaseModel):
11
11
  callFunction: bool
12
12
  default: Optional[bool] = False
13
13
  apiKey: Optional[str] = None
14
+ maxOutputTokens: Optional[int] = None
14
15
 
15
16
  @classmethod
16
17
  def from_config(cls, model_name: str):
sycommon/llm/get_llm.py CHANGED
@@ -153,7 +153,7 @@ def get_llm(
153
153
 
154
154
  # 根据 wrap_structured 决定默认超时
155
155
  if timeout is _TIMEOUT_UNSET:
156
- timeout = 180 if wrap_structured else None
156
+ timeout = 180 if wrap_structured else 300
157
157
 
158
158
  init_params = {
159
159
  "model_provider": llmConfig.provider,
@@ -165,9 +165,13 @@ def get_llm(
165
165
  "streaming": streaming,
166
166
  "timeout": timeout,
167
167
  "max_retries": max_retries,
168
- "stream_chunk_timeout": None,
168
+ "stream_chunk_timeout": 180,
169
169
  }
170
170
 
171
+ # 传入 maxOutputTokens(max_completion_tokens)确保模型有足够的输出 token 空间
172
+ if llmConfig.maxOutputTokens:
173
+ init_params["max_tokens"] = llmConfig.maxOutputTokens
174
+
171
175
  # 合并其他透传参数(包括 presence_penalty, extra_body, top_p 等)
172
176
  init_params.update(kwargs)
173
177
 
@@ -230,6 +230,23 @@ class LLMWithTokenTracking(BaseChatModel):
230
230
  "output_tokens": total_output,
231
231
  })
232
232
 
233
+ def _resolve_model_profile(self):
234
+ """从 llmConfig.maxTokens 构建 profile,供 deepagents 计算压缩阈值使用。"""
235
+ if self.llmConfig and self.llmConfig.maxTokens:
236
+ return {"max_input_tokens": self.llmConfig.maxTokens}
237
+ if self.llm and hasattr(self.llm, 'profile') and self.llm.profile:
238
+ return self.llm.profile
239
+ return None
240
+
241
+ @property
242
+ def profile(self):
243
+ p = self._resolve_model_profile()
244
+ return p
245
+
246
+ @profile.setter
247
+ def profile(self, value):
248
+ pass
249
+
233
250
  def bind_tools(self, tools, *, tool_choice=None, **kwargs):
234
251
  """绑定工具 - 委托给底层 LLM,返回绑定后的 Runnable"""
235
252
  return self.llm.bind_tools(tools, tool_choice=tool_choice, **kwargs)
@@ -345,7 +345,6 @@ class StructuredRunnableWithToken(Runnable):
345
345
  try:
346
346
  with self.langfuse.start_as_current_observation(as_type="span", name="invoke") as span:
347
347
  with propagate_attributes(session_id=trace_id, user_id=user_id):
348
- span.update_trace(user_id=user_id, session_id=trace_id)
349
348
  return self._execute_chain(input, config, trace_id, user_id, span)
350
349
  except Exception as e:
351
350
  # Langfuse 跟踪失败不应阻断业务,降级执行
@@ -366,7 +365,6 @@ class StructuredRunnableWithToken(Runnable):
366
365
  try:
367
366
  with self.langfuse.start_as_current_observation(as_type="span", name="ainvoke") as span:
368
367
  with propagate_attributes(session_id=trace_id, user_id=user_id):
369
- span.update_trace(user_id=user_id, session_id=trace_id)
370
368
  return await self._aexecute_chain(input, config, trace_id, user_id, span)
371
369
  except Exception as e:
372
370
  # Langfuse 跟踪失败不应阻断业务,降级执行
@@ -405,7 +403,7 @@ class StructuredRunnableWithToken(Runnable):
405
403
  input_data = {"messages": adapted_input}
406
404
 
407
405
  if span:
408
- span.update_trace(input=input_data)
406
+ span.update(input=input_data)
409
407
 
410
408
  structured_result = self.retry_chain.invoke(
411
409
  input_data,
@@ -413,7 +411,7 @@ class StructuredRunnableWithToken(Runnable):
413
411
  )
414
412
 
415
413
  if span:
416
- span.update_trace(output=structured_result)
414
+ span.update(output=structured_result)
417
415
 
418
416
  token_usage = token_handler.usage_metadata
419
417
  structured_result._token_usage_ = token_usage
@@ -447,10 +445,10 @@ class StructuredRunnableWithToken(Runnable):
447
445
 
448
446
  # 2. 检查并执行上下文压缩 (仅在异步模式且开启时)
449
447
  if self.enable_compression:
450
- max_tokens = self.llmConfig.maxTokens
448
+ max_tokens = int(self.llmConfig.maxTokens * self.threshold_ratio)
451
449
  current_tokens = self._count_tokens(adapted_input)
452
450
 
453
- if current_tokens > max_tokens * self.threshold_ratio:
451
+ if current_tokens > max_tokens:
454
452
  SYLogger.warning(
455
453
  f"⚠️ Context limit reached: {current_tokens}/{max_tokens}")
456
454
  # 执行压缩,替换 adapted_input
@@ -465,7 +463,7 @@ class StructuredRunnableWithToken(Runnable):
465
463
  input_data = {"messages": adapted_input}
466
464
 
467
465
  if span:
468
- span.update_trace(input=input_data)
466
+ span.update(input=input_data)
469
467
 
470
468
  # 3. 调用子链
471
469
  structured_result = await self.retry_chain.ainvoke(
@@ -474,7 +472,7 @@ class StructuredRunnableWithToken(Runnable):
474
472
  )
475
473
 
476
474
  if span:
477
- span.update_trace(output=structured_result)
475
+ span.update(output=structured_result)
478
476
 
479
477
  token_usage = token_handler.usage_metadata
480
478
  structured_result._token_usage_ = token_usage
@@ -3,11 +3,26 @@ from typing import Tuple, List, Optional, Any, Dict
3
3
  from langfuse import Langfuse, get_client
4
4
  from sycommon.config.Config import Config, SingletonMeta
5
5
  from sycommon.logging.kafka_log import SYLogger
6
- from langfuse.langchain import CallbackHandler
6
+ from langfuse.langchain import CallbackHandler as _LangfuseCallbackHandler
7
7
  from sycommon.tools.env import get_env_var
8
8
  from sycommon.tools.merge_headers import get_header_value
9
9
 
10
10
 
11
+ class SafeLangfuseCallbackHandler(_LangfuseCallbackHandler):
12
+ """Wraps Langfuse CallbackHandler to handle unpicklable error objects.
13
+
14
+ LangGraph errors (e.g. Command/Interrupt dataclasses) may hold
15
+ _asyncio.Task references. When Langfuse calls str(error) → asdict() →
16
+ deepcopy(), this raises TypeError: cannot pickle '_asyncio.Task' object.
17
+ """
18
+
19
+ def on_chain_error(self, error, **kwargs):
20
+ try:
21
+ super().on_chain_error(error, **kwargs)
22
+ except (TypeError, ValueError):
23
+ pass
24
+
25
+
11
26
  class LangfuseInitializer(metaclass=SingletonMeta):
12
27
  """
13
28
  Langfuse 初始化管理器
@@ -46,14 +61,14 @@ class LangfuseInitializer(metaclass=SingletonMeta):
46
61
  'baseUrl', '')
47
62
  os.environ["LANGFUSE_TRACING_ENVIRONMENT"] = environment
48
63
  os.environ["OTEL_SERVICE_NAME"] = server_name
49
- # 设置 OTLP 追踪导出器超时时间(单位:秒)
50
- os.environ["OTEL_EXPORTER_OTLP_TRACES_TIMEOUT"] = "60"
51
- # 全局 OTLP 超时(覆盖所有信号:追踪/指标/日志)
52
- os.environ["OTEL_EXPORTER_OTLP_TIMEOUT"] = "60"
64
+ # 控制 Langfuse OTLP 导出:超时 30s、每批最多 32 个 span、每 10s 强制 flush
65
+ os.environ["LANGFUSE_TIMEOUT"] = "30"
66
+ os.environ["LANGFUSE_FLUSH_AT"] = "32"
67
+ os.environ["LANGFUSE_FLUSH_INTERVAL"] = "10"
53
68
 
54
69
  self._langfuse_client = get_client()
55
70
 
56
- langfuse_handler = CallbackHandler()
71
+ langfuse_handler = SafeLangfuseCallbackHandler()
57
72
  self._base_callbacks.append(langfuse_handler)
58
73
 
59
74
  SYLogger.info(f"Langfuse 初始化成功 [Service: {server_name}]")