vectorvein 0.2.96__tar.gz → 0.2.97__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. {vectorvein-0.2.96 → vectorvein-0.2.97}/PKG-INFO +1 -1
  2. {vectorvein-0.2.96 → vectorvein-0.2.97}/pyproject.toml +1 -1
  3. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/chat_clients/anthropic_client.py +6 -18
  4. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/chat_clients/openai_compatible_client.py +237 -79
  5. {vectorvein-0.2.96 → vectorvein-0.2.97}/README.md +0 -0
  6. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/__init__.py +0 -0
  7. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/api/__init__.py +0 -0
  8. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/api/client.py +0 -0
  9. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/api/exceptions.py +0 -0
  10. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/api/models.py +0 -0
  11. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/chat_clients/__init__.py +0 -0
  12. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/chat_clients/baichuan_client.py +0 -0
  13. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/chat_clients/base_client.py +0 -0
  14. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/chat_clients/deepseek_client.py +0 -0
  15. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/chat_clients/ernie_client.py +0 -0
  16. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/chat_clients/gemini_client.py +0 -0
  17. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/chat_clients/groq_client.py +0 -0
  18. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/chat_clients/local_client.py +0 -0
  19. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/chat_clients/minimax_client.py +0 -0
  20. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/chat_clients/mistral_client.py +0 -0
  21. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/chat_clients/moonshot_client.py +0 -0
  22. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/chat_clients/openai_client.py +0 -0
  23. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/chat_clients/py.typed +0 -0
  24. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/chat_clients/qwen_client.py +0 -0
  25. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/chat_clients/stepfun_client.py +0 -0
  26. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/chat_clients/utils.py +0 -0
  27. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/chat_clients/xai_client.py +0 -0
  28. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/chat_clients/yi_client.py +0 -0
  29. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/chat_clients/zhipuai_client.py +0 -0
  30. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/py.typed +0 -0
  31. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/server/token_server.py +0 -0
  32. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/settings/__init__.py +0 -0
  33. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/settings/py.typed +0 -0
  34. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/types/__init__.py +0 -0
  35. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/types/defaults.py +0 -0
  36. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/types/enums.py +0 -0
  37. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/types/exception.py +0 -0
  38. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/types/llm_parameters.py +0 -0
  39. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/types/py.typed +0 -0
  40. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/types/settings.py +0 -0
  41. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/utilities/media_processing.py +0 -0
  42. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/utilities/rate_limiter.py +0 -0
  43. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/utilities/retry.py +0 -0
  44. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/workflow/graph/edge.py +0 -0
  45. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/workflow/graph/node.py +0 -0
  46. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/workflow/graph/port.py +0 -0
  47. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/workflow/graph/workflow.py +0 -0
  48. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/workflow/nodes/__init__.py +0 -0
  49. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/workflow/nodes/audio_generation.py +0 -0
  50. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/workflow/nodes/control_flows.py +0 -0
  51. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/workflow/nodes/file_processing.py +0 -0
  52. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/workflow/nodes/image_generation.py +0 -0
  53. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/workflow/nodes/llms.py +0 -0
  54. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/workflow/nodes/media_editing.py +0 -0
  55. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/workflow/nodes/media_processing.py +0 -0
  56. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/workflow/nodes/output.py +0 -0
  57. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/workflow/nodes/relational_db.py +0 -0
  58. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/workflow/nodes/text_processing.py +0 -0
  59. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/workflow/nodes/tools.py +0 -0
  60. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/workflow/nodes/triggers.py +0 -0
  61. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/workflow/nodes/vector_db.py +0 -0
  62. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/workflow/nodes/video_generation.py +0 -0
  63. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/workflow/nodes/web_crawlers.py +0 -0
  64. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/workflow/utils/analyse.py +0 -0
  65. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/workflow/utils/check.py +0 -0
  66. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/workflow/utils/json_to_code.py +0 -0
  67. {vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/workflow/utils/layout.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vectorvein
3
- Version: 0.2.96
3
+ Version: 0.2.97
4
4
  Summary: VectorVein Python SDK
5
5
  Author-Email: Anderson <andersonby@163.com>
6
6
  License: MIT
@@ -17,7 +17,7 @@ description = "VectorVein Python SDK"
17
17
  name = "vectorvein"
18
18
  readme = "README.md"
19
19
  requires-python = ">=3.10"
20
- version = "0.2.96"
20
+ version = "0.2.97"
21
21
 
22
22
  [project.license]
23
23
  text = "MIT"
@@ -597,9 +597,7 @@ class AnthropicChatClient(BaseChatClient):
597
597
  if max_tokens is None:
598
598
  max_output_tokens = self.model_setting.max_output_tokens
599
599
  native_multimodal = self.model_setting.native_multimodal
600
- token_counts = get_message_token_counts(
601
- messages=messages, tools=tools, model=self.model, native_multimodal=native_multimodal
602
- )
600
+ token_counts = get_message_token_counts(messages=messages, tools=tools, model=self.model, native_multimodal=native_multimodal)
603
601
  if max_output_tokens is not None:
604
602
  max_tokens = self.model_setting.context_length - token_counts
605
603
  max_tokens = min(max(max_tokens, 1), max_output_tokens)
@@ -707,9 +705,7 @@ class AnthropicChatClient(BaseChatClient):
707
705
  result["raw_content"][i]["input"] = {}
708
706
  try:
709
707
  if result["tool_calls"][0]["function"]["arguments"]:
710
- result["raw_content"][i]["input"] = json.loads(
711
- result["tool_calls"][0]["function"]["arguments"]
712
- )
708
+ result["raw_content"][i]["input"] = json.loads(result["tool_calls"][0]["function"]["arguments"])
713
709
  else:
714
710
  result["raw_content"][i]["input"] = {}
715
711
  except json.JSONDecodeError:
@@ -727,9 +723,7 @@ class AnthropicChatClient(BaseChatClient):
727
723
  yield ChatCompletionDeltaMessage(**message)
728
724
  elif isinstance(chunk, RawMessageDeltaEvent):
729
725
  result["usage"]["completion_tokens"] = chunk.usage.output_tokens
730
- result["usage"]["total_tokens"] = (
731
- result["usage"]["prompt_tokens"] + result["usage"]["completion_tokens"]
732
- )
726
+ result["usage"]["total_tokens"] = result["usage"]["prompt_tokens"] + result["usage"]["completion_tokens"]
733
727
  yield ChatCompletionDeltaMessage(
734
728
  usage=Usage(
735
729
  prompt_tokens=result["usage"]["prompt_tokens"],
@@ -1211,9 +1205,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
1211
1205
  if max_tokens is None:
1212
1206
  max_output_tokens = self.model_setting.max_output_tokens
1213
1207
  native_multimodal = self.model_setting.native_multimodal
1214
- token_counts = get_message_token_counts(
1215
- messages=messages, tools=tools, model=self.model, native_multimodal=native_multimodal
1216
- )
1208
+ token_counts = get_message_token_counts(messages=messages, tools=tools, model=self.model, native_multimodal=native_multimodal)
1217
1209
  if max_output_tokens is not None:
1218
1210
  max_tokens = self.model_setting.context_length - token_counts
1219
1211
  max_tokens = min(max(max_tokens, 1), max_output_tokens)
@@ -1321,9 +1313,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
1321
1313
  result["raw_content"][i]["input"] = {}
1322
1314
  try:
1323
1315
  if result["tool_calls"][0]["function"]["arguments"]:
1324
- result["raw_content"][i]["input"] = json.loads(
1325
- result["tool_calls"][0]["function"]["arguments"]
1326
- )
1316
+ result["raw_content"][i]["input"] = json.loads(result["tool_calls"][0]["function"]["arguments"])
1327
1317
  else:
1328
1318
  result["raw_content"][i]["input"] = {}
1329
1319
  except json.JSONDecodeError:
@@ -1341,9 +1331,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
1341
1331
  yield ChatCompletionDeltaMessage(**message)
1342
1332
  elif isinstance(chunk, RawMessageDeltaEvent):
1343
1333
  result["usage"]["completion_tokens"] = chunk.usage.output_tokens
1344
- result["usage"]["total_tokens"] = (
1345
- result["usage"]["prompt_tokens"] + result["usage"]["completion_tokens"]
1346
- )
1334
+ result["usage"]["total_tokens"] = result["usage"]["prompt_tokens"] + result["usage"]["completion_tokens"]
1347
1335
  yield ChatCompletionDeltaMessage(
1348
1336
  usage=Usage(
1349
1337
  prompt_tokens=result["usage"]["prompt_tokens"],
@@ -381,8 +381,8 @@ class OpenAICompatibleChatClient(BaseChatClient):
381
381
  usage = None
382
382
  buffer = ""
383
383
  in_reasoning = False
384
- current_reasoning = []
385
- current_content = []
384
+ accumulated_reasoning = []
385
+ accumulated_content = []
386
386
 
387
387
  for chunk in stream_response:
388
388
  if chunk.usage and chunk.usage.total_tokens:
@@ -404,63 +404,142 @@ class OpenAICompatibleChatClient(BaseChatClient):
404
404
  for index, tool_call in enumerate(chunk.choices[0].delta.tool_calls):
405
405
  tool_call.index = index
406
406
  tool_call.type = "function" # 也是 MiniMax 的不规范导致的问题
407
- yield ChatCompletionDeltaMessage(**chunk.choices[0].delta.model_dump(), usage=usage)
407
+
408
+ # 即使支持 function call,也要处理 <think> 标签
409
+ message = chunk.choices[0].delta.model_dump()
410
+ delta_content = message.get("content", "")
411
+ if delta_content:
412
+ buffer += delta_content
413
+
414
+ # 处理缓冲区中的内容,提取 <think> 标签
415
+ current_output_content = ""
416
+ current_reasoning_content = ""
417
+
418
+ while buffer:
419
+ if not in_reasoning:
420
+ start_pos = buffer.find("<think>")
421
+ if start_pos != -1:
422
+ # 找到了 <think> 标签的开始
423
+ if start_pos > 0:
424
+ current_output_content += buffer[:start_pos]
425
+ buffer = buffer[start_pos + 7 :] # 跳过 "<think>"
426
+ in_reasoning = True
427
+ else:
428
+ # 没有找到 <think> 标签,直接输出
429
+ current_output_content += buffer
430
+ buffer = ""
431
+ else:
432
+ end_pos = buffer.find("</think>")
433
+ if end_pos != -1:
434
+ # 找到了 </think> 标签的结束
435
+ current_reasoning_content += buffer[:end_pos]
436
+ buffer = buffer[end_pos + 8 :] # 跳过 "</think>"
437
+ in_reasoning = False
438
+ else:
439
+ # 没有找到结束标签,继续累积到推理内容中
440
+ current_reasoning_content += buffer
441
+ buffer = ""
442
+
443
+ # 累积内容
444
+ if current_output_content:
445
+ accumulated_content.append(current_output_content)
446
+ if current_reasoning_content:
447
+ accumulated_reasoning.append(current_reasoning_content)
448
+
449
+ # 只要有内容变化就产生 delta
450
+ if current_output_content or current_reasoning_content:
451
+ if current_output_content:
452
+ message["content"] = current_output_content
453
+ elif current_reasoning_content:
454
+ message["reasoning_content"] = current_reasoning_content
455
+ message["content"] = "" # 推理时不输出普通内容
456
+ elif not current_output_content and not current_reasoning_content and not message.get("tool_calls"):
457
+ # 如果没有任何内容且没有 tool_calls,则跳过这个消息
458
+ continue
459
+
460
+ yield ChatCompletionDeltaMessage(**message, usage=usage)
408
461
  else:
409
462
  message = chunk.choices[0].delta.model_dump()
410
463
  delta_content = message.get("content", "")
411
- buffer += delta_content or ""
464
+ if delta_content:
465
+ buffer += delta_content
412
466
 
413
- while True:
467
+ # 处理缓冲区中的内容,提取 <think> 标签
468
+ current_output_content = ""
469
+ current_reasoning_content = ""
470
+
471
+ while buffer:
414
472
  if not in_reasoning:
415
473
  start_pos = buffer.find("<think>")
416
474
  if start_pos != -1:
417
- current_content.append(buffer[:start_pos])
418
- buffer = buffer[start_pos + 7 :]
475
+ # 找到了 <think> 标签的开始
476
+ if start_pos > 0:
477
+ current_output_content += buffer[:start_pos]
478
+ buffer = buffer[start_pos + 7 :] # 跳过 "<think>"
419
479
  in_reasoning = True
420
480
  else:
421
- current_content.append(buffer)
481
+ # 没有找到 <think> 标签,直接输出
482
+ current_output_content += buffer
422
483
  buffer = ""
423
- break
424
484
  else:
425
485
  end_pos = buffer.find("</think>")
426
486
  if end_pos != -1:
427
- current_reasoning.append(buffer[:end_pos])
428
- buffer = buffer[end_pos + 8 :]
487
+ # 找到了 </think> 标签的结束
488
+ current_reasoning_content += buffer[:end_pos]
489
+ buffer = buffer[end_pos + 8 :] # 跳过 "</think>"
429
490
  in_reasoning = False
430
491
  else:
431
- current_reasoning.append(buffer)
492
+ # 没有找到结束标签,继续累积到推理内容中
493
+ current_reasoning_content += buffer
432
494
  buffer = ""
433
- break
434
-
435
- message["content"] = "".join(current_content)
436
- if current_reasoning:
437
- message["reasoning_content"] = "".join(current_reasoning)
438
- current_content.clear()
439
- current_reasoning.clear()
440
-
441
- if tools:
442
- full_content += message["content"]
443
- tool_call_data = ToolCallContentProcessor(full_content).tool_calls
444
- if tool_call_data:
445
- message["tool_calls"] = tool_call_data["tool_calls"]
446
-
447
- if full_content in ("<", "<|", "<|▶", "<|▶|") or full_content.startswith("<|▶|>"):
448
- message["content"] = ""
449
- result = message
450
- continue
451
-
452
- yield ChatCompletionDeltaMessage(**message, usage=usage)
453
495
 
496
+ # 累积内容
497
+ if current_output_content:
498
+ accumulated_content.append(current_output_content)
499
+ if current_reasoning_content:
500
+ accumulated_reasoning.append(current_reasoning_content)
501
+
502
+ # 只要有内容变化就产生 delta
503
+ if current_output_content or current_reasoning_content:
504
+ if current_output_content:
505
+ message["content"] = current_output_content
506
+ elif current_reasoning_content:
507
+ message["reasoning_content"] = current_reasoning_content
508
+ message["content"] = "" # 推理时不输出普通内容
509
+
510
+ if tools:
511
+ full_content += current_output_content
512
+ tool_call_data = ToolCallContentProcessor(full_content).tool_calls
513
+ if tool_call_data:
514
+ message["tool_calls"] = tool_call_data["tool_calls"]
515
+
516
+ if full_content in ("<", "<|", "<|▶", "<|▶|") or full_content.startswith("<|▶|>"):
517
+ message["content"] = ""
518
+ result = message
519
+ continue
520
+
521
+ yield ChatCompletionDeltaMessage(**message, usage=usage)
522
+
523
+ # 处理最后剩余的缓冲区内容
454
524
  if buffer:
455
525
  if in_reasoning:
456
- current_reasoning.append(buffer)
526
+ accumulated_reasoning.append(buffer)
457
527
  else:
458
- current_content.append(buffer)
459
- final_message = {
460
- "content": "".join(current_content),
461
- "reasoning_content": "".join(current_reasoning) if current_reasoning else None,
462
- }
463
- yield ChatCompletionDeltaMessage(**final_message, usage=usage)
528
+ accumulated_content.append(buffer)
529
+
530
+ final_message = {}
531
+ if accumulated_content:
532
+ final_content = "".join(accumulated_content)
533
+ if final_content.strip(): # 只有当内容非空时才输出
534
+ final_message["content"] = final_content
535
+
536
+ if accumulated_reasoning:
537
+ final_reasoning = "".join(accumulated_reasoning)
538
+ if final_reasoning.strip(): # 只有当推理内容非空时才输出
539
+ final_message["reasoning_content"] = final_reasoning
540
+
541
+ if final_message:
542
+ yield ChatCompletionDeltaMessage(**final_message, usage=usage)
464
543
 
465
544
  if result:
466
545
  yield ChatCompletionDeltaMessage(**result, usage=usage)
@@ -820,7 +899,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
820
899
  messages=messages,
821
900
  stream=self.stream,
822
901
  temperature=self.temperature,
823
- max_tokens=max_tokens, # Azure 的 OpenAI 怎么 stream 模式不支持 max_completion_tokens
902
+ max_tokens=max_tokens,
824
903
  top_p=top_p,
825
904
  audio=audio,
826
905
  frequency_penalty=frequency_penalty,
@@ -855,8 +934,8 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
855
934
  usage = None
856
935
  buffer = ""
857
936
  in_reasoning = False
858
- current_reasoning = []
859
- current_content = []
937
+ accumulated_reasoning = []
938
+ accumulated_content = []
860
939
 
861
940
  async for chunk in stream_response:
862
941
  if chunk.usage and chunk.usage.total_tokens:
@@ -878,63 +957,142 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
878
957
  for index, tool_call in enumerate(chunk.choices[0].delta.tool_calls):
879
958
  tool_call.index = index
880
959
  tool_call.type = "function"
881
- yield ChatCompletionDeltaMessage(**chunk.choices[0].delta.model_dump(), usage=usage)
960
+
961
+ # 即使支持 function call,也要处理 <think> 标签
962
+ message = chunk.choices[0].delta.model_dump()
963
+ delta_content = message.get("content", "")
964
+ if delta_content:
965
+ buffer += delta_content
966
+
967
+ # 处理缓冲区中的内容,提取 <think> 标签
968
+ current_output_content = ""
969
+ current_reasoning_content = ""
970
+
971
+ while buffer:
972
+ if not in_reasoning:
973
+ start_pos = buffer.find("<think>")
974
+ if start_pos != -1:
975
+ # 找到了 <think> 标签的开始
976
+ if start_pos > 0:
977
+ current_output_content += buffer[:start_pos]
978
+ buffer = buffer[start_pos + 7 :] # 跳过 "<think>"
979
+ in_reasoning = True
980
+ else:
981
+ # 没有找到 <think> 标签,直接输出
982
+ current_output_content += buffer
983
+ buffer = ""
984
+ else:
985
+ end_pos = buffer.find("</think>")
986
+ if end_pos != -1:
987
+ # 找到了 </think> 标签的结束
988
+ current_reasoning_content += buffer[:end_pos]
989
+ buffer = buffer[end_pos + 8 :] # 跳过 "</think>"
990
+ in_reasoning = False
991
+ else:
992
+ # 没有找到结束标签,继续累积到推理内容中
993
+ current_reasoning_content += buffer
994
+ buffer = ""
995
+
996
+ # 累积内容
997
+ if current_output_content:
998
+ accumulated_content.append(current_output_content)
999
+ if current_reasoning_content:
1000
+ accumulated_reasoning.append(current_reasoning_content)
1001
+
1002
+ # 只要有内容变化就产生 delta
1003
+ if current_output_content or current_reasoning_content:
1004
+ if current_output_content:
1005
+ message["content"] = current_output_content
1006
+ elif current_reasoning_content:
1007
+ message["reasoning_content"] = current_reasoning_content
1008
+ message["content"] = "" # 推理时不输出普通内容
1009
+ elif not current_output_content and not current_reasoning_content and not message.get("tool_calls"):
1010
+ # 如果没有任何内容且没有 tool_calls,则跳过这个消息
1011
+ continue
1012
+
1013
+ yield ChatCompletionDeltaMessage(**message, usage=usage)
882
1014
  else:
883
1015
  message = chunk.choices[0].delta.model_dump()
884
1016
  delta_content = message.get("content", "")
885
- buffer += delta_content or ""
1017
+ if delta_content:
1018
+ buffer += delta_content
886
1019
 
887
- while True:
1020
+ # 处理缓冲区中的内容,提取 <think> 标签
1021
+ current_output_content = ""
1022
+ current_reasoning_content = ""
1023
+
1024
+ while buffer:
888
1025
  if not in_reasoning:
889
1026
  start_pos = buffer.find("<think>")
890
1027
  if start_pos != -1:
891
- current_content.append(buffer[:start_pos])
892
- buffer = buffer[start_pos + 7 :]
1028
+ # 找到了 <think> 标签的开始
1029
+ if start_pos > 0:
1030
+ current_output_content += buffer[:start_pos]
1031
+ buffer = buffer[start_pos + 7 :] # 跳过 "<think>"
893
1032
  in_reasoning = True
894
1033
  else:
895
- current_content.append(buffer)
1034
+ # 没有找到 <think> 标签,直接输出
1035
+ current_output_content += buffer
896
1036
  buffer = ""
897
- break
898
1037
  else:
899
1038
  end_pos = buffer.find("</think>")
900
1039
  if end_pos != -1:
901
- current_reasoning.append(buffer[:end_pos])
902
- buffer = buffer[end_pos + 8 :]
1040
+ # 找到了 </think> 标签的结束
1041
+ current_reasoning_content += buffer[:end_pos]
1042
+ buffer = buffer[end_pos + 8 :] # 跳过 "</think>"
903
1043
  in_reasoning = False
904
1044
  else:
905
- current_reasoning.append(buffer)
1045
+ # 没有找到结束标签,继续累积到推理内容中
1046
+ current_reasoning_content += buffer
906
1047
  buffer = ""
907
- break
908
-
909
- message["content"] = "".join(current_content)
910
- if current_reasoning:
911
- message["reasoning_content"] = "".join(current_reasoning)
912
- current_content.clear()
913
- current_reasoning.clear()
914
-
915
- if tools:
916
- full_content += message["content"]
917
- tool_call_data = ToolCallContentProcessor(full_content).tool_calls
918
- if tool_call_data:
919
- message["tool_calls"] = tool_call_data["tool_calls"]
920
-
921
- if full_content in ("<", "<|", "<|▶", "<|▶|") or full_content.startswith("<|▶|>"):
922
- message["content"] = ""
923
- result = message
924
- continue
925
-
926
- yield ChatCompletionDeltaMessage(**message, usage=usage)
927
1048
 
1049
+ # 累积内容
1050
+ if current_output_content:
1051
+ accumulated_content.append(current_output_content)
1052
+ if current_reasoning_content:
1053
+ accumulated_reasoning.append(current_reasoning_content)
1054
+
1055
+ # 只要有内容变化就产生 delta
1056
+ if current_output_content or current_reasoning_content:
1057
+ if current_output_content:
1058
+ message["content"] = current_output_content
1059
+ elif current_reasoning_content:
1060
+ message["reasoning_content"] = current_reasoning_content
1061
+ message["content"] = "" # 推理时不输出普通内容
1062
+
1063
+ if tools:
1064
+ full_content += current_output_content
1065
+ tool_call_data = ToolCallContentProcessor(full_content).tool_calls
1066
+ if tool_call_data:
1067
+ message["tool_calls"] = tool_call_data["tool_calls"]
1068
+
1069
+ if full_content in ("<", "<|", "<|▶", "<|▶|") or full_content.startswith("<|▶|>"):
1070
+ message["content"] = ""
1071
+ result = message
1072
+ continue
1073
+
1074
+ yield ChatCompletionDeltaMessage(**message, usage=usage)
1075
+
1076
+ # 处理最后剩余的缓冲区内容
928
1077
  if buffer:
929
1078
  if in_reasoning:
930
- current_reasoning.append(buffer)
1079
+ accumulated_reasoning.append(buffer)
931
1080
  else:
932
- current_content.append(buffer)
933
- final_message = {
934
- "content": "".join(current_content),
935
- "reasoning_content": "".join(current_reasoning) if current_reasoning else None,
936
- }
937
- yield ChatCompletionDeltaMessage(**final_message, usage=usage)
1081
+ accumulated_content.append(buffer)
1082
+
1083
+ final_message = {}
1084
+ if accumulated_content:
1085
+ final_content = "".join(accumulated_content)
1086
+ if final_content.strip(): # 只有当内容非空时才输出
1087
+ final_message["content"] = final_content
1088
+
1089
+ if accumulated_reasoning:
1090
+ final_reasoning = "".join(accumulated_reasoning)
1091
+ if final_reasoning.strip(): # 只有当推理内容非空时才输出
1092
+ final_message["reasoning_content"] = final_reasoning
1093
+
1094
+ if final_message:
1095
+ yield ChatCompletionDeltaMessage(**final_message, usage=usage)
938
1096
 
939
1097
  if result:
940
1098
  yield ChatCompletionDeltaMessage(**result, usage=usage)
File without changes