tamar-model-client 0.1.28__py3-none-any.whl → 0.1.30__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -35,12 +35,13 @@ try:
35
35
  from tamar_model_client import TamarModelClient, AsyncTamarModelClient
36
36
  from tamar_model_client.schemas import ModelRequest, UserContext
37
37
  from tamar_model_client.enums import ProviderType, InvokeType, Channel
38
-
38
+
39
39
  # 为了调试,临时启用 SDK 的日志输出
40
40
  # 注意:这会输出 JSON 格式的日志
41
41
  import os
42
+
42
43
  os.environ['TAMAR_MODEL_CLIENT_LOG_LEVEL'] = 'INFO'
43
-
44
+
44
45
  except ImportError as e:
45
46
  logger.error(f"导入模块失败: {e}")
46
47
  sys.exit(1)
@@ -512,6 +513,7 @@ async def test_async_concurrent_requests(num_requests: int = 150):
512
513
  failed_requests = 0
513
514
  request_times: List[float] = []
514
515
  errors: Dict[str, int] = {}
516
+ trace_id = "8885588866668888886666888888866666668888"
515
517
 
516
518
  # 异步锁
517
519
  stats_lock = asyncio.Lock()
@@ -623,13 +625,586 @@ async def test_async_concurrent_requests(num_requests: int = 150):
623
625
  }
624
626
 
625
627
 
628
+ async def test_async_batch_with_circuit_breaker_v2(num_requests: int = 10):
629
+ """
630
+ 测试熔断器功能 - 使用单个请求而不是批量请求
631
+
632
+ 通过发送多个单独的请求来触发熔断器,因为批量请求中的单个失败不会触发熔断。
633
+
634
+ Args:
635
+ num_requests: 要发送的请求数,默认10个
636
+ """
637
+ print(f"\n🔥 测试熔断器功能 - 改进版 ({num_requests} 个独立请求)...")
638
+
639
+ # 保存原始环境变量
640
+ import os
641
+ original_env = {}
642
+ env_vars = ['MODEL_CLIENT_RESILIENT_ENABLED', 'MODEL_CLIENT_HTTP_FALLBACK_URL',
643
+ 'MODEL_CLIENT_CIRCUIT_BREAKER_THRESHOLD', 'MODEL_CLIENT_CIRCUIT_BREAKER_TIMEOUT']
644
+ for var in env_vars:
645
+ original_env[var] = os.environ.get(var)
646
+
647
+ # 设置环境变量以启用熔断器和HTTP fallback
648
+ os.environ['MODEL_CLIENT_RESILIENT_ENABLED'] = 'true'
649
+ os.environ['MODEL_CLIENT_HTTP_FALLBACK_URL'] = 'http://localhost:8000'
650
+ os.environ['MODEL_CLIENT_CIRCUIT_BREAKER_THRESHOLD'] = '3' # 3次失败后触发熔断
651
+ os.environ['MODEL_CLIENT_CIRCUIT_BREAKER_TIMEOUT'] = '30' # 熔断器30秒后恢复
652
+
653
+ print(f" 环境变量设置:")
654
+ print(f" - MODEL_CLIENT_RESILIENT_ENABLED: {os.environ.get('MODEL_CLIENT_RESILIENT_ENABLED')}")
655
+ print(f" - MODEL_CLIENT_HTTP_FALLBACK_URL: {os.environ.get('MODEL_CLIENT_HTTP_FALLBACK_URL')}")
656
+ print(f" - 熔断阈值: 1 次失败")
657
+
658
+ # 统计变量
659
+ total_requests = 0
660
+ successful_requests = 0
661
+ failed_requests = 0
662
+ circuit_breaker_opened = False
663
+ http_fallback_used = 0
664
+ request_times: List[float] = []
665
+ errors: Dict[str, int] = {}
666
+
667
+ try:
668
+ # 创建一个共享的异步客户端(启用熔断器)
669
+ async with AsyncTamarModelClient() as client:
670
+ print(f"\n 熔断器配置:")
671
+ print(f" - 启用状态: {getattr(client, 'resilient_enabled', False)}")
672
+ print(f" - HTTP Fallback URL: {getattr(client, 'http_fallback_url', 'None')}")
673
+
674
+ for i in range(num_requests):
675
+ start_time = time.time()
676
+
677
+ try:
678
+ # 前4个请求使用错误的model来触发失败
679
+ if i < 6:
680
+ request = ModelRequest(
681
+ provider=ProviderType.OPENAI,
682
+ model="invalid-model-to-trigger-error", # 无效模型
683
+ input=f"测试失败请求 {i + 1}",
684
+ user_context=UserContext(
685
+ user_id=f"circuit_test_{i}",
686
+ org_id="test_org_circuit",
687
+ client_type="circuit_test"
688
+ )
689
+ )
690
+ else:
691
+ # 后续请求使用正确的model
692
+ request = ModelRequest(
693
+ provider=ProviderType.GOOGLE,
694
+ model="tamar-google-gemini-flash-lite",
695
+ contents=f"测试请求 {i + 1}: 计算 {i} + {i}",
696
+ user_context=UserContext(
697
+ user_id=f"circuit_test_{i}",
698
+ org_id="test_org_circuit",
699
+ client_type="circuit_test"
700
+ ),
701
+ config={"temperature": 0.1}
702
+ )
703
+
704
+ print(f"\n 📤 发送请求 {i + 1}/{num_requests}...")
705
+ response = await client.invoke(request, timeout=10000)
706
+
707
+ duration = time.time() - start_time
708
+ request_times.append(duration)
709
+ total_requests += 1
710
+ successful_requests += 1
711
+
712
+ print(f" ✅ 请求 {i + 1} 成功 - 耗时: {duration:.2f}秒")
713
+
714
+ # 检查是否通过HTTP fallback
715
+ if hasattr(client, 'resilient_enabled') and client.resilient_enabled:
716
+ try:
717
+ metrics = client.get_resilient_metrics()
718
+ if metrics and metrics['circuit_breaker']['state'] == 'open':
719
+ http_fallback_used += 1
720
+ print(f" (通过HTTP fallback)")
721
+ except:
722
+ pass
723
+
724
+ except Exception as e:
725
+ duration = time.time() - start_time
726
+ request_times.append(duration)
727
+ total_requests += 1
728
+ failed_requests += 1
729
+
730
+ error_type = type(e).__name__
731
+ errors[error_type] = errors.get(error_type, 0) + 1
732
+
733
+ print(f" ❌ 请求 {i + 1} 失败: {error_type} - {str(e)[:100]}")
734
+ print(f" 耗时: {duration:.2f}秒")
735
+
736
+ # 检查熔断器状态
737
+ if hasattr(client, 'resilient_enabled') and client.resilient_enabled:
738
+ try:
739
+ metrics = client.get_resilient_metrics()
740
+ if metrics and 'circuit_breaker' in metrics:
741
+ state = metrics['circuit_breaker']['state']
742
+ failures = metrics['circuit_breaker']['failure_count']
743
+
744
+ if state == 'open' and not circuit_breaker_opened:
745
+ circuit_breaker_opened = True
746
+ print(f" 🔻 熔断器已打开!失败次数: {failures}")
747
+
748
+ print(f" 熔断器: {state}, 失败计数: {failures}")
749
+ except Exception as e:
750
+ print(f" 获取熔断器状态失败: {e}")
751
+
752
+ # 请求之间短暂等待
753
+ await asyncio.sleep(0.2)
754
+
755
+ # 最终统计
756
+ print(f"\n📊 熔断器测试结果:")
757
+ print(f" 总请求数: {total_requests}")
758
+ print(f" 成功请求: {successful_requests}")
759
+ print(f" 失败请求: {failed_requests}")
760
+
761
+ print(f"\n 🔥 熔断器统计:")
762
+ print(f" - 熔断器是否触发: {'是' if circuit_breaker_opened else '否'}")
763
+ print(f" - HTTP fallback使用次数: {http_fallback_used}")
764
+
765
+ # 获取最终状态
766
+ if hasattr(client, 'resilient_enabled') and client.resilient_enabled:
767
+ try:
768
+ final_metrics = client.get_resilient_metrics()
769
+ if final_metrics and 'circuit_breaker' in final_metrics:
770
+ print(f" - 最终状态: {final_metrics['circuit_breaker']['state']}")
771
+ print(f" - 总失败次数: {final_metrics['circuit_breaker']['failure_count']}")
772
+ except Exception as e:
773
+ print(f" - 获取最终状态失败: {e}")
774
+
775
+ if errors:
776
+ print(f"\n 错误统计:")
777
+ for error_type, count in sorted(errors.items(), key=lambda x: x[1], reverse=True):
778
+ print(f" - {error_type}: {count} 次")
779
+
780
+ except Exception as e:
781
+ print(f"❌ 测试失败: {str(e)}")
782
+ import traceback
783
+ traceback.print_exc()
784
+
785
+ finally:
786
+ # 恢复原始环境变量
787
+ for var, value in original_env.items():
788
+ if value is None:
789
+ os.environ.pop(var, None)
790
+ else:
791
+ os.environ[var] = value
792
+
793
+
794
+ async def test_async_batch_with_circuit_breaker(batch_size: int = 10, num_batches: int = 5):
795
+ """测试异步批量请求 - 触发熔断器使用HTTP fallback
796
+
797
+ 这个测试会复用一个AsyncTamarModelClient,通过发送多个批量请求来触发熔断器,
798
+ 使其自动切换到HTTP fallback模式。
799
+
800
+ Args:
801
+ batch_size: 每个批量请求包含的请求数,默认10个
802
+ num_batches: 要发送的批量请求数,默认5个
803
+ """
804
+ print(f"\n🔥 测试异步批量请求 - 熔断器模式 ({num_batches} 个批量,每批 {batch_size} 个请求)...")
805
+
806
+ # 保存原始环境变量
807
+ import os
808
+ original_env = {}
809
+ env_vars = ['MODEL_CLIENT_RESILIENT_ENABLED', 'MODEL_CLIENT_HTTP_FALLBACK_URL',
810
+ 'MODEL_CLIENT_CIRCUIT_BREAKER_THRESHOLD', 'MODEL_CLIENT_CIRCUIT_BREAKER_TIMEOUT']
811
+ for var in env_vars:
812
+ original_env[var] = os.environ.get(var)
813
+
814
+ # 设置环境变量以启用熔断器和HTTP fallback
815
+ os.environ['MODEL_CLIENT_RESILIENT_ENABLED'] = 'true'
816
+ os.environ['MODEL_CLIENT_HTTP_FALLBACK_URL'] = 'http://localhost:8000'
817
+ os.environ['MODEL_CLIENT_CIRCUIT_BREAKER_THRESHOLD'] = '3' # 3次失败后触发熔断
818
+ os.environ['MODEL_CLIENT_CIRCUIT_BREAKER_TIMEOUT'] = '60' # 熔断器60秒后恢复
819
+
820
+ # 调试:打印环境变量确认设置成功
821
+ print(f" 环境变量设置:")
822
+ print(f" - MODEL_CLIENT_RESILIENT_ENABLED: {os.environ.get('MODEL_CLIENT_RESILIENT_ENABLED')}")
823
+ print(f" - MODEL_CLIENT_HTTP_FALLBACK_URL: {os.environ.get('MODEL_CLIENT_HTTP_FALLBACK_URL')}")
824
+
825
+ # 统计变量
826
+ total_batches = 0
827
+ successful_batches = 0
828
+ failed_batches = 0
829
+ circuit_breaker_opened = False
830
+ http_fallback_used = 0
831
+ batch_times: List[float] = []
832
+ errors: Dict[str, int] = {}
833
+
834
+ try:
835
+ from tamar_model_client.schemas import BatchModelRequest, BatchModelRequestItem
836
+
837
+ # 创建一个共享的异步客户端(启用熔断器)
838
+ async with AsyncTamarModelClient() as client:
839
+ print(f" 熔断器配置:")
840
+ print(f" - 启用状态: {getattr(client, 'resilient_enabled', False)}")
841
+ print(f" - HTTP Fallback URL: {getattr(client, 'http_fallback_url', 'None')}")
842
+ if hasattr(client, 'resilient_enabled') and client.resilient_enabled:
843
+ try:
844
+ metrics = client.get_resilient_metrics()
845
+ if metrics and 'circuit_breaker' in metrics:
846
+ print(f" - 熔断阈值: {metrics['circuit_breaker'].get('failure_threshold', 'Unknown')} 次失败")
847
+ print(f" - 熔断恢复时间: {metrics['circuit_breaker'].get('recovery_timeout', 'Unknown')} 秒")
848
+ else:
849
+ print(f" - 熔断阈值: {os.environ.get('MODEL_CLIENT_CIRCUIT_BREAKER_THRESHOLD', '5')} 次失败")
850
+ print(f" - 熔断恢复时间: {os.environ.get('MODEL_CLIENT_CIRCUIT_BREAKER_TIMEOUT', '60')} 秒")
851
+ except:
852
+ print(f" - 熔断阈值: {os.environ.get('MODEL_CLIENT_CIRCUIT_BREAKER_THRESHOLD', '5')} 次失败")
853
+ print(f" - 熔断恢复时间: {os.environ.get('MODEL_CLIENT_CIRCUIT_BREAKER_TIMEOUT', '60')} 秒")
854
+ else:
855
+ print(f" - 熔断器未启用")
856
+
857
+ for batch_num in range(num_batches):
858
+ start_time = time.time()
859
+
860
+ try:
861
+ # 构建批量请求
862
+ items = []
863
+ for i in range(batch_size):
864
+ request_idx = batch_num * batch_size + i
865
+
866
+ # 混合使用不同的provider和model
867
+ if request_idx % 4 == 0:
868
+ # Google Vertex AI
869
+ item = BatchModelRequestItem(
870
+ provider=ProviderType.GOOGLE,
871
+ channel=Channel.VERTEXAI,
872
+ invoke_type=InvokeType.GENERATION,
873
+ model="tamar-google-gemini-flash-lite",
874
+ contents=f"计算 {request_idx} * 2 的结果",
875
+ custom_id=f"batch-{batch_num}-google-vertex-{i}",
876
+ config={"temperature": 0.1}
877
+ )
878
+ elif request_idx % 4 == 1:
879
+ # Google AI Studio
880
+ item = BatchModelRequestItem(
881
+ provider=ProviderType.GOOGLE,
882
+ channel=Channel.AI_STUDIO,
883
+ invoke_type=InvokeType.GENERATION,
884
+ model="tamar-google-gemini-flash-lite",
885
+ contents=f"解释数字 {request_idx} 的含义",
886
+ custom_id=f"batch-{batch_num}-google-studio-{i}",
887
+ config={"temperature": 0.2, "maxOutputTokens": 50}
888
+ )
889
+ elif request_idx % 4 == 2:
890
+ # Azure OpenAI
891
+ item = BatchModelRequestItem(
892
+ provider=ProviderType.AZURE,
893
+ invoke_type=InvokeType.CHAT_COMPLETIONS,
894
+ model="gpt-4o-mini",
895
+ messages=[{"role": "user", "content": f"数字 {request_idx} 是奇数还是偶数?"}],
896
+ custom_id=f"batch-{batch_num}-azure-{i}",
897
+ config={"temperature": 0.1, "max_tokens": 30}
898
+ )
899
+ else:
900
+ # 故意使用错误的model来触发失败(帮助触发熔断)
901
+ if batch_num < 2: # 前两个批次使用错误model
902
+ item = BatchModelRequestItem(
903
+ provider=ProviderType.GOOGLE,
904
+ invoke_type=InvokeType.GENERATION,
905
+ model="invalid-model-to-trigger-error",
906
+ contents=f"测试错误 {request_idx}",
907
+ custom_id=f"batch-{batch_num}-error-{i}",
908
+ )
909
+ else:
910
+ # 后续批次使用正确的model
911
+ item = BatchModelRequestItem(
912
+ provider=ProviderType.GOOGLE,
913
+ invoke_type=InvokeType.GENERATION,
914
+ model="tamar-google-gemini-flash-lite",
915
+ contents=f"Hello from batch {batch_num}, item {i}",
916
+ custom_id=f"batch-{batch_num}-recovery-{i}",
917
+ )
918
+
919
+ items.append(item)
920
+
921
+ batch_request = BatchModelRequest(
922
+ user_context=UserContext(
923
+ user_id=f"circuit_breaker_test_batch_{batch_num}",
924
+ org_id="test_org_circuit_breaker",
925
+ client_type="async_batch_circuit_test"
926
+ ),
927
+ items=items
928
+ )
929
+
930
+ # 执行批量请求
931
+ print(f"\n 📦 发送批量请求 {batch_num + 1}/{num_batches}...")
932
+ batch_response = await client.invoke_batch(
933
+ batch_request,
934
+ timeout=300000.0,
935
+ request_id=f"circuit_breaker_test_{batch_num}"
936
+ )
937
+
938
+ duration = time.time() - start_time
939
+ batch_times.append(duration)
940
+ total_batches += 1
941
+ successful_batches += 1
942
+
943
+ # 统计结果
944
+ success_count = sum(1 for r in batch_response.responses if not r.error)
945
+ error_count = sum(1 for r in batch_response.responses if r.error)
946
+
947
+ print(f" ✅ 批量请求 {batch_num + 1} 完成")
948
+ print(f" - 耗时: {duration:.2f} 秒")
949
+ print(f" - 成功: {success_count}/{batch_size}")
950
+ print(f" - 失败: {error_count}/{batch_size}")
951
+
952
+ # 检查熔断器状态
953
+ if hasattr(client, 'resilient_enabled') and client.resilient_enabled:
954
+ try:
955
+ breaker_status = client.get_resilient_metrics()
956
+ if breaker_status and 'circuit_breaker' in breaker_status:
957
+ if breaker_status['circuit_breaker']['state'] == 'OPEN':
958
+ if not circuit_breaker_opened:
959
+ circuit_breaker_opened = True
960
+ print(f" 🔻 熔断器已打开!将使用HTTP fallback")
961
+ http_fallback_used += 1
962
+
963
+ print(f" - 熔断器状态: {breaker_status['circuit_breaker']['state']}")
964
+ print(f" - 失败计数: {breaker_status['circuit_breaker']['failure_count']}")
965
+ except Exception as e:
966
+ print(f" - 获取熔断器状态失败: {e}")
967
+
968
+ except Exception as e:
969
+ duration = time.time() - start_time
970
+ batch_times.append(duration)
971
+ total_batches += 1
972
+ failed_batches += 1
973
+
974
+ error_type = str(e).split(':')[0] if ':' in str(e) else str(e)[:50]
975
+ errors[error_type] = errors.get(error_type, 0) + 1
976
+
977
+ print(f" ❌ 批量请求 {batch_num + 1} 失败: {error_type}")
978
+ print(f" - 耗时: {duration:.2f} 秒")
979
+
980
+ # 批次之间短暂等待
981
+ if batch_num < num_batches - 1:
982
+ await asyncio.sleep(0.5)
983
+
984
+ # 最终统计
985
+ print(f"\n📊 批量请求测试结果 (熔断器模式):")
986
+ print(f" 总批次数: {total_batches}")
987
+ print(f" 成功批次: {successful_batches} ({successful_batches / total_batches * 100:.1f}%)")
988
+ print(f" 失败批次: {failed_batches} ({failed_batches / total_batches * 100:.1f}%)")
989
+
990
+ if batch_times:
991
+ avg_batch_time = sum(batch_times) / len(batch_times)
992
+ print(f"\n 批次耗时统计:")
993
+ print(f" - 平均: {avg_batch_time:.3f} 秒")
994
+ print(f" - 最小: {min(batch_times):.3f} 秒")
995
+ print(f" - 最大: {max(batch_times):.3f} 秒")
996
+
997
+ print(f"\n 🔥 熔断器统计:")
998
+ print(f" - 熔断器是否触发: {'是' if circuit_breaker_opened else '否'}")
999
+ print(f" - HTTP fallback使用次数: {http_fallback_used}")
1000
+
1001
+ # 获取最终的熔断器状态
1002
+ if hasattr(client, 'resilient_enabled') and client.resilient_enabled:
1003
+ try:
1004
+ final_metrics = client.get_resilient_metrics()
1005
+ if final_metrics and 'circuit_breaker' in final_metrics:
1006
+ print(f" - 最终状态: {final_metrics['circuit_breaker']['state']}")
1007
+ print(f" - 总失败次数: {final_metrics['circuit_breaker']['failure_count']}")
1008
+ print(f" - 失败阈值: {final_metrics['circuit_breaker']['failure_threshold']}")
1009
+ print(f" - 恢复超时: {final_metrics['circuit_breaker']['recovery_timeout']}秒")
1010
+ else:
1011
+ print(f" - 无法获取熔断器指标")
1012
+ except Exception as e:
1013
+ print(f" - 获取熔断器指标失败: {e}")
1014
+
1015
+ if errors:
1016
+ print(f"\n 错误统计:")
1017
+ for error_type, count in sorted(errors.items(), key=lambda x: x[1], reverse=True):
1018
+ print(f" - {error_type}: {count} 次")
1019
+
1020
+ except Exception as e:
1021
+ print(f"❌ 批量测试失败: {str(e)}")
1022
+ import traceback
1023
+ traceback.print_exc()
1024
+
1025
+ finally:
1026
+ # 恢复原始环境变量
1027
+ for var, value in original_env.items():
1028
+ if value is None:
1029
+ os.environ.pop(var, None)
1030
+ else:
1031
+ os.environ[var] = value
1032
+
1033
+
1034
+ async def test_async_concurrent_requests_independent_clients(num_requests: int = 150):
1035
+ """测试异步并发请求 - 每个请求使用独立的AsyncTamarModelClient
1036
+
1037
+ 每个请求都会创建一个新的AsyncTamarModelClient实例,不复用连接,
1038
+ 这种方式可以测试客户端的连接管理和资源清理能力。
1039
+
1040
+ Args:
1041
+ num_requests: 要发送的总请求数,默认150个
1042
+ """
1043
+ print(f"\n🚀 测试异步并发请求 - 独立客户端模式 ({num_requests} 个请求)...")
1044
+
1045
+ # 统计变量
1046
+ total_requests = 0
1047
+ successful_requests = 0
1048
+ failed_requests = 0
1049
+ request_times: List[float] = []
1050
+ errors: Dict[str, int] = {}
1051
+ trace_id = "9999999999999999933333999999993333399999"
1052
+
1053
+ # 异步锁
1054
+ stats_lock = asyncio.Lock()
1055
+
1056
+ async def make_single_async_request_with_independent_client(request_id: int) -> Tuple[bool, float, str]:
1057
+ """使用独立的AsyncTamarModelClient执行单个异步请求
1058
+
1059
+ Returns:
1060
+ (success, duration, error_msg)
1061
+ """
1062
+ start_time = time.time()
1063
+ try:
1064
+ # 每个请求创建独立的客户端实例
1065
+ async with AsyncTamarModelClient() as client:
1066
+ # 根据请求ID选择不同的provider和model,增加测试多样性
1067
+ if request_id % 3 == 0:
1068
+ # Google Vertex AI
1069
+ request = ModelRequest(
1070
+ provider=ProviderType.GOOGLE,
1071
+ channel=Channel.VERTEXAI,
1072
+ invoke_type=InvokeType.GENERATION,
1073
+ model="tamar-google-gemini-flash-lite",
1074
+ contents=f"请计算 {request_id % 10} + {(request_id + 1) % 10} 等于多少?",
1075
+ user_context=UserContext(
1076
+ user_id=f"{os.environ.get('INSTANCE_ID', '0')}_independent_{request_id:03d}",
1077
+ org_id="test_org_independent",
1078
+ client_type="async_independent_test"
1079
+ ),
1080
+ config={"temperature": 0.1}
1081
+ )
1082
+ elif request_id % 3 == 1:
1083
+ # Google AI Studio
1084
+ request = ModelRequest(
1085
+ provider=ProviderType.GOOGLE,
1086
+ channel=Channel.AI_STUDIO,
1087
+ invoke_type=InvokeType.GENERATION,
1088
+ model="tamar-google-gemini-flash-lite",
1089
+ contents=f"什么是人工智能?请简要回答。(请求ID: {request_id})",
1090
+ user_context=UserContext(
1091
+ user_id=f"{os.environ.get('INSTANCE_ID', '0')}_independent_{request_id:03d}",
1092
+ org_id="test_org_independent",
1093
+ client_type="async_independent_test"
1094
+ ),
1095
+ config={"temperature": 0.3, "maxOutputTokens": 100}
1096
+ )
1097
+ else:
1098
+ # Azure OpenAI
1099
+ request = ModelRequest(
1100
+ provider=ProviderType.AZURE,
1101
+ invoke_type=InvokeType.CHAT_COMPLETIONS,
1102
+ model="gpt-4o-mini",
1103
+ messages=[
1104
+ {"role": "user", "content": f"请简单解释什么是云计算?(请求{request_id})"}
1105
+ ],
1106
+ user_context=UserContext(
1107
+ user_id=f"{os.environ.get('INSTANCE_ID', '0')}_independent_{request_id:03d}",
1108
+ org_id="test_org_independent",
1109
+ client_type="async_independent_test"
1110
+ ),
1111
+ config={"temperature": 0.2, "max_tokens": 100}
1112
+ )
1113
+
1114
+ response = await client.invoke(request, timeout=300000.0, request_id=f"{trace_id}_{request_id}")
1115
+ duration = time.time() - start_time
1116
+ return (True, duration, "")
1117
+
1118
+ except Exception as e:
1119
+ duration = time.time() - start_time
1120
+ error_msg = str(e)
1121
+ return (False, duration, error_msg)
1122
+
1123
+ async def async_independent_worker(request_id: int):
1124
+ """独立异步工作协程 - 每个请求使用独立的客户端"""
1125
+ nonlocal total_requests, successful_requests, failed_requests
1126
+
1127
+ success, duration, error_msg = await make_single_async_request_with_independent_client(request_id)
1128
+
1129
+ async with stats_lock:
1130
+ total_requests += 1
1131
+ request_times.append(duration)
1132
+
1133
+ if success:
1134
+ successful_requests += 1
1135
+ else:
1136
+ failed_requests += 1
1137
+ # 统计错误类型
1138
+ error_type = error_msg.split(':')[0] if ':' in error_msg else error_msg[:50]
1139
+ errors[error_type] = errors.get(error_type, 0) + 1
1140
+
1141
+ # 每20个请求输出一次进度
1142
+ if total_requests % 20 == 0:
1143
+ print(
1144
+ f" 进度: {total_requests}/{num_requests} (成功: {successful_requests}, 失败: {failed_requests})")
1145
+
1146
+ # 使用独立客户端执行并发请求
1147
+ start_time = time.time()
1148
+
1149
+ # 限制并发数,避免创建过多连接
1150
+ semaphore = asyncio.Semaphore(30) # 降低并发数,因为每个请求都要创建新连接
1151
+
1152
+ async def limited_independent_worker(request_id: int):
1153
+ async with semaphore:
1154
+ await async_independent_worker(request_id)
1155
+
1156
+ # 创建所有任务
1157
+ tasks = [limited_independent_worker(i) for i in range(num_requests)]
1158
+
1159
+ # 等待所有任务完成
1160
+ await asyncio.gather(*tasks)
1161
+
1162
+ total_duration = time.time() - start_time
1163
+
1164
+ # 计算统计信息
1165
+ avg_request_time = sum(request_times) / len(request_times) if request_times else 0
1166
+ min_request_time = min(request_times) if request_times else 0
1167
+ max_request_time = max(request_times) if request_times else 0
1168
+
1169
+ # 输出结果
1170
+ print(f"\n📊 异步并发测试结果 (独立客户端模式):")
1171
+ print(f" 总请求数: {total_requests}")
1172
+ print(f" 成功请求: {successful_requests} ({successful_requests / total_requests * 100:.1f}%)")
1173
+ print(f" 失败请求: {failed_requests} ({failed_requests / total_requests * 100:.1f}%)")
1174
+ print(f" 总耗时: {total_duration:.2f} 秒")
1175
+ print(f" 平均QPS: {total_requests / total_duration:.2f}")
1176
+ print(f"\n 请求耗时统计:")
1177
+ print(f" - 平均: {avg_request_time:.3f} 秒")
1178
+ print(f" - 最小: {min_request_time:.3f} 秒")
1179
+ print(f" - 最大: {max_request_time:.3f} 秒")
1180
+
1181
+ print(f"\n 🔍 测试特点:")
1182
+ print(f" - 每个请求使用独立的AsyncTamarModelClient实例")
1183
+ print(f" - 不复用连接,测试连接管理能力")
1184
+ print(f" - 限制并发数为30个,避免过多连接")
1185
+ print(f" - 使用多种Provider (Google Vertex AI, AI Studio, Azure OpenAI)")
1186
+
1187
+ if errors:
1188
+ print(f"\n 错误统计:")
1189
+ for error_type, count in sorted(errors.items(), key=lambda x: x[1], reverse=True):
1190
+ print(f" - {error_type}: {count} 次")
1191
+
1192
+ return {
1193
+ "total": total_requests,
1194
+ "successful": successful_requests,
1195
+ "failed": failed_requests,
1196
+ "duration": total_duration,
1197
+ "qps": total_requests / total_duration
1198
+ }
1199
+
1200
+
626
1201
  async def main():
627
1202
  """主函数"""
628
1203
  print("🚀 简化版 Google/Azure 测试")
629
1204
  print("=" * 50)
630
1205
 
631
1206
  try:
632
- # # 同步测试
1207
+ # 同步测试
633
1208
  test_google_ai_studio()
634
1209
  test_google_vertex_ai()
635
1210
  test_azure_openai()
@@ -647,8 +1222,17 @@ async def main():
647
1222
  # 同步并发测试
648
1223
  test_concurrent_requests(2) # 测试150个并发请求
649
1224
 
650
- # 异步并发测试
651
- await test_async_concurrent_requests(2) # 测试150个异步并发请求
1225
+ # # 异步并发测试
1226
+ await test_async_concurrent_requests(2) # 测试50个异步并发请求(复用连接)
1227
+
1228
+ # 异步并发测试 - 独立客户端模式
1229
+ # await test_async_concurrent_requests_independent_clients(30) # 测试30个独立客户端并发请求
1230
+
1231
+ # 异步批量测试 - 熔断器模式(原版)
1232
+ # await test_async_batch_with_circuit_breaker(10, 5) # 测试5个批量请求,每批10个请求
1233
+
1234
+ # 熔断器测试 - 改进版(使用单个请求)
1235
+ # await test_async_batch_with_circuit_breaker_v2(10) # 测试10个独立请求触发熔断
652
1236
 
653
1237
  print("\n✅ 测试完成")
654
1238