sycommon-python-lib 0.1.52a0__tar.gz → 0.1.57b6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. {sycommon_python_lib-0.1.52a0 → sycommon_python_lib-0.1.57b6}/PKG-INFO +18 -11
  2. sycommon_python_lib-0.1.57b6/pyproject.toml +40 -0
  3. {sycommon_python_lib-0.1.52a0 → sycommon_python_lib-0.1.57b6}/src/sycommon/config/Config.py +29 -4
  4. sycommon_python_lib-0.1.57b6/src/sycommon/config/LangfuseConfig.py +15 -0
  5. {sycommon_python_lib-0.1.52a0 → sycommon_python_lib-0.1.57b6}/src/sycommon/config/RerankerConfig.py +1 -0
  6. sycommon_python_lib-0.1.57b6/src/sycommon/config/SentryConfig.py +13 -0
  7. sycommon_python_lib-0.1.57b6/src/sycommon/database/async_base_db_service.py +36 -0
  8. sycommon_python_lib-0.1.57b6/src/sycommon/database/async_database_service.py +96 -0
  9. sycommon_python_lib-0.1.57b6/src/sycommon/llm/embedding.py +353 -0
  10. sycommon_python_lib-0.1.57b6/src/sycommon/llm/get_llm.py +37 -0
  11. sycommon_python_lib-0.1.57b6/src/sycommon/llm/llm_logger.py +126 -0
  12. sycommon_python_lib-0.1.57b6/src/sycommon/llm/llm_tokens.py +119 -0
  13. sycommon_python_lib-0.1.57b6/src/sycommon/llm/struct_token.py +192 -0
  14. sycommon_python_lib-0.1.57b6/src/sycommon/llm/sy_langfuse.py +103 -0
  15. sycommon_python_lib-0.1.57b6/src/sycommon/llm/usage_token.py +117 -0
  16. sycommon_python_lib-0.1.57b6/src/sycommon/logging/async_sql_logger.py +65 -0
  17. sycommon_python_lib-0.1.57b6/src/sycommon/logging/kafka_log.py +310 -0
  18. sycommon_python_lib-0.1.57b6/src/sycommon/logging/logger_levels.py +23 -0
  19. {sycommon_python_lib-0.1.52a0 → sycommon_python_lib-0.1.57b6}/src/sycommon/middleware/context.py +2 -0
  20. {sycommon_python_lib-0.1.52a0 → sycommon_python_lib-0.1.57b6}/src/sycommon/middleware/exception.py +10 -16
  21. {sycommon_python_lib-0.1.52a0 → sycommon_python_lib-0.1.57b6}/src/sycommon/middleware/middleware.py +1 -1
  22. {sycommon_python_lib-0.1.52a0 → sycommon_python_lib-0.1.57b6}/src/sycommon/middleware/timeout.py +2 -1
  23. sycommon_python_lib-0.1.57b6/src/sycommon/middleware/traceid.py +294 -0
  24. sycommon_python_lib-0.1.57b6/src/sycommon/notice/uvicorn_monitor.py +200 -0
  25. sycommon_python_lib-0.1.57b6/src/sycommon/rabbitmq/rabbitmq_client.py +462 -0
  26. sycommon_python_lib-0.1.57b6/src/sycommon/rabbitmq/rabbitmq_pool.py +398 -0
  27. sycommon_python_lib-0.1.57b6/src/sycommon/rabbitmq/rabbitmq_service.py +59 -0
  28. sycommon_python_lib-0.1.57b6/src/sycommon/rabbitmq/rabbitmq_service_client_manager.py +211 -0
  29. sycommon_python_lib-0.1.57b6/src/sycommon/rabbitmq/rabbitmq_service_connection_monitor.py +73 -0
  30. sycommon_python_lib-0.1.57b6/src/sycommon/rabbitmq/rabbitmq_service_consumer_manager.py +285 -0
  31. sycommon_python_lib-0.1.57b6/src/sycommon/rabbitmq/rabbitmq_service_core.py +117 -0
  32. sycommon_python_lib-0.1.57b6/src/sycommon/rabbitmq/rabbitmq_service_producer_manager.py +238 -0
  33. sycommon_python_lib-0.1.57b6/src/sycommon/sentry/sy_sentry.py +35 -0
  34. sycommon_python_lib-0.1.57b6/src/sycommon/services.py +297 -0
  35. sycommon_python_lib-0.1.57b6/src/sycommon/sse/__init__.py +0 -0
  36. sycommon_python_lib-0.1.57b6/src/sycommon/synacos/__init__.py +0 -0
  37. {sycommon_python_lib-0.1.52a0 → sycommon_python_lib-0.1.57b6}/src/sycommon/synacos/feign.py +17 -8
  38. {sycommon_python_lib-0.1.52a0 → sycommon_python_lib-0.1.57b6}/src/sycommon/synacos/feign_client.py +26 -10
  39. sycommon_python_lib-0.1.57b6/src/sycommon/synacos/nacos_client_base.py +121 -0
  40. sycommon_python_lib-0.1.57b6/src/sycommon/synacos/nacos_config_manager.py +107 -0
  41. sycommon_python_lib-0.1.57b6/src/sycommon/synacos/nacos_heartbeat_manager.py +144 -0
  42. sycommon_python_lib-0.1.57b6/src/sycommon/synacos/nacos_service.py +151 -0
  43. sycommon_python_lib-0.1.57b6/src/sycommon/synacos/nacos_service_discovery.py +157 -0
  44. sycommon_python_lib-0.1.57b6/src/sycommon/synacos/nacos_service_registration.py +270 -0
  45. sycommon_python_lib-0.1.57b6/src/sycommon/tools/__init__.py +0 -0
  46. sycommon_python_lib-0.1.57b6/src/sycommon/tools/env.py +62 -0
  47. sycommon_python_lib-0.1.57b6/src/sycommon/tools/merge_headers.py +117 -0
  48. sycommon_python_lib-0.1.57b6/src/sycommon/tools/snowflake.py +248 -0
  49. {sycommon_python_lib-0.1.52a0 → sycommon_python_lib-0.1.57b6}/src/sycommon_python_lib.egg-info/PKG-INFO +18 -11
  50. {sycommon_python_lib-0.1.52a0 → sycommon_python_lib-0.1.57b6}/src/sycommon_python_lib.egg-info/SOURCES.txt +30 -0
  51. sycommon_python_lib-0.1.57b6/src/sycommon_python_lib.egg-info/requires.txt +22 -0
  52. sycommon_python_lib-0.1.52a0/pyproject.toml +0 -33
  53. sycommon_python_lib-0.1.52a0/src/sycommon/logging/kafka_log.py +0 -544
  54. sycommon_python_lib-0.1.52a0/src/sycommon/middleware/traceid.py +0 -166
  55. sycommon_python_lib-0.1.52a0/src/sycommon/rabbitmq/rabbitmq_client.py +0 -457
  56. sycommon_python_lib-0.1.52a0/src/sycommon/rabbitmq/rabbitmq_pool.py +0 -338
  57. sycommon_python_lib-0.1.52a0/src/sycommon/rabbitmq/rabbitmq_service.py +0 -877
  58. sycommon_python_lib-0.1.52a0/src/sycommon/services.py +0 -258
  59. sycommon_python_lib-0.1.52a0/src/sycommon/synacos/nacos_service.py +0 -855
  60. sycommon_python_lib-0.1.52a0/src/sycommon/tools/snowflake.py +0 -33
  61. sycommon_python_lib-0.1.52a0/src/sycommon_python_lib.egg-info/requires.txt +0 -15
  62. {sycommon_python_lib-0.1.52a0 → sycommon_python_lib-0.1.57b6}/README.md +0 -0
  63. {sycommon_python_lib-0.1.52a0 → sycommon_python_lib-0.1.57b6}/setup.cfg +0 -0
  64. {sycommon_python_lib-0.1.52a0 → sycommon_python_lib-0.1.57b6}/src/command/cli.py +0 -0
  65. {sycommon_python_lib-0.1.52a0 → sycommon_python_lib-0.1.57b6}/src/sycommon/__init__.py +0 -0
  66. {sycommon_python_lib-0.1.52a0 → sycommon_python_lib-0.1.57b6}/src/sycommon/config/DatabaseConfig.py +0 -0
  67. {sycommon_python_lib-0.1.52a0 → sycommon_python_lib-0.1.57b6}/src/sycommon/config/EmbeddingConfig.py +0 -0
  68. {sycommon_python_lib-0.1.52a0 → sycommon_python_lib-0.1.57b6}/src/sycommon/config/LLMConfig.py +0 -0
  69. {sycommon_python_lib-0.1.52a0 → sycommon_python_lib-0.1.57b6}/src/sycommon/config/MQConfig.py +0 -0
  70. {sycommon_python_lib-0.1.52a0 → sycommon_python_lib-0.1.57b6}/src/sycommon/config/__init__.py +0 -0
  71. {sycommon_python_lib-0.1.52a0 → sycommon_python_lib-0.1.57b6}/src/sycommon/database/base_db_service.py +0 -0
  72. {sycommon_python_lib-0.1.52a0 → sycommon_python_lib-0.1.57b6}/src/sycommon/database/database_service.py +0 -0
  73. {sycommon_python_lib-0.1.52a0 → sycommon_python_lib-0.1.57b6}/src/sycommon/health/__init__.py +0 -0
  74. {sycommon_python_lib-0.1.52a0 → sycommon_python_lib-0.1.57b6}/src/sycommon/health/health_check.py +0 -0
  75. {sycommon_python_lib-0.1.52a0 → sycommon_python_lib-0.1.57b6}/src/sycommon/health/metrics.py +0 -0
  76. {sycommon_python_lib-0.1.52a0 → sycommon_python_lib-0.1.57b6}/src/sycommon/health/ping.py +0 -0
  77. {sycommon_python_lib-0.1.52a0/src/sycommon/logging → sycommon_python_lib-0.1.57b6/src/sycommon/llm}/__init__.py +0 -0
  78. {sycommon_python_lib-0.1.52a0/src/sycommon/middleware → sycommon_python_lib-0.1.57b6/src/sycommon/logging}/__init__.py +0 -0
  79. {sycommon_python_lib-0.1.52a0 → sycommon_python_lib-0.1.57b6}/src/sycommon/logging/logger_wrapper.py +0 -0
  80. {sycommon_python_lib-0.1.52a0 → sycommon_python_lib-0.1.57b6}/src/sycommon/logging/sql_logger.py +0 -0
  81. {sycommon_python_lib-0.1.52a0/src/sycommon/models → sycommon_python_lib-0.1.57b6/src/sycommon/middleware}/__init__.py +0 -0
  82. {sycommon_python_lib-0.1.52a0 → sycommon_python_lib-0.1.57b6}/src/sycommon/middleware/cors.py +0 -0
  83. {sycommon_python_lib-0.1.52a0 → sycommon_python_lib-0.1.57b6}/src/sycommon/middleware/docs.py +0 -0
  84. {sycommon_python_lib-0.1.52a0 → sycommon_python_lib-0.1.57b6}/src/sycommon/middleware/monitor_memory.py +0 -0
  85. {sycommon_python_lib-0.1.52a0 → sycommon_python_lib-0.1.57b6}/src/sycommon/middleware/mq.py +0 -0
  86. {sycommon_python_lib-0.1.52a0/src/sycommon/sse → sycommon_python_lib-0.1.57b6/src/sycommon/models}/__init__.py +0 -0
  87. {sycommon_python_lib-0.1.52a0 → sycommon_python_lib-0.1.57b6}/src/sycommon/models/base_http.py +0 -0
  88. {sycommon_python_lib-0.1.52a0 → sycommon_python_lib-0.1.57b6}/src/sycommon/models/log.py +0 -0
  89. {sycommon_python_lib-0.1.52a0 → sycommon_python_lib-0.1.57b6}/src/sycommon/models/mqlistener_config.py +0 -0
  90. {sycommon_python_lib-0.1.52a0 → sycommon_python_lib-0.1.57b6}/src/sycommon/models/mqmsg_model.py +0 -0
  91. {sycommon_python_lib-0.1.52a0 → sycommon_python_lib-0.1.57b6}/src/sycommon/models/mqsend_config.py +0 -0
  92. {sycommon_python_lib-0.1.52a0 → sycommon_python_lib-0.1.57b6}/src/sycommon/models/sso_user.py +0 -0
  93. {sycommon_python_lib-0.1.52a0/src/sycommon/synacos → sycommon_python_lib-0.1.57b6/src/sycommon/notice}/__init__.py +0 -0
  94. {sycommon_python_lib-0.1.52a0/src/sycommon/tools → sycommon_python_lib-0.1.57b6/src/sycommon/sentry}/__init__.py +0 -0
  95. {sycommon_python_lib-0.1.52a0 → sycommon_python_lib-0.1.57b6}/src/sycommon/sse/event.py +0 -0
  96. {sycommon_python_lib-0.1.52a0 → sycommon_python_lib-0.1.57b6}/src/sycommon/sse/sse.py +0 -0
  97. {sycommon_python_lib-0.1.52a0 → sycommon_python_lib-0.1.57b6}/src/sycommon/synacos/example.py +0 -0
  98. {sycommon_python_lib-0.1.52a0 → sycommon_python_lib-0.1.57b6}/src/sycommon/synacos/example2.py +0 -0
  99. {sycommon_python_lib-0.1.52a0 → sycommon_python_lib-0.1.57b6}/src/sycommon/synacos/param.py +0 -0
  100. {sycommon_python_lib-0.1.52a0 → sycommon_python_lib-0.1.57b6}/src/sycommon/tools/docs.py +0 -0
  101. {sycommon_python_lib-0.1.52a0 → sycommon_python_lib-0.1.57b6}/src/sycommon/tools/timing.py +0 -0
  102. {sycommon_python_lib-0.1.52a0 → sycommon_python_lib-0.1.57b6}/src/sycommon_python_lib.egg-info/dependency_links.txt +0 -0
  103. {sycommon_python_lib-0.1.52a0 → sycommon_python_lib-0.1.57b6}/src/sycommon_python_lib.egg-info/entry_points.txt +0 -0
  104. {sycommon_python_lib-0.1.52a0 → sycommon_python_lib-0.1.57b6}/src/sycommon_python_lib.egg-info/top_level.txt +0 -0
@@ -1,24 +1,31 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sycommon-python-lib
3
- Version: 0.1.52a0
3
+ Version: 0.1.57b6
4
4
  Summary: Add your description here
5
- Requires-Python: >=3.10
5
+ Requires-Python: >=3.11
6
6
  Description-Content-Type: text/markdown
7
7
  Requires-Dist: aio-pika>=9.5.8
8
- Requires-Dist: aiohttp>=3.13.2
8
+ Requires-Dist: aiohttp>=3.13.3
9
+ Requires-Dist: aiomysql>=0.3.2
9
10
  Requires-Dist: decorator>=5.2.1
10
- Requires-Dist: fastapi>=0.121.2
11
- Requires-Dist: kafka-python>=2.2.16
11
+ Requires-Dist: fastapi>=0.128.0
12
+ Requires-Dist: kafka-python>=2.3.0
13
+ Requires-Dist: langchain>=1.2.3
14
+ Requires-Dist: langchain-core>=1.2.7
15
+ Requires-Dist: langchain-openai>=1.1.7
16
+ Requires-Dist: langfuse>=3.11.2
17
+ Requires-Dist: langgraph>=1.0.6
12
18
  Requires-Dist: loguru>=0.7.3
13
19
  Requires-Dist: mysql-connector-python>=9.5.0
14
- Requires-Dist: nacos-sdk-python>=2.0.9
15
- Requires-Dist: pydantic>=2.12.4
20
+ Requires-Dist: nacos-sdk-python<3.0,>=2.0.9
21
+ Requires-Dist: psutil>=7.2.1
22
+ Requires-Dist: pydantic>=2.12.5
16
23
  Requires-Dist: python-dotenv>=1.2.1
17
24
  Requires-Dist: pyyaml>=6.0.3
18
- Requires-Dist: sqlalchemy>=2.0.44
19
- Requires-Dist: starlette>=0.49.3
20
- Requires-Dist: uuid>=1.30
21
- Requires-Dist: uvicorn>=0.38.0
25
+ Requires-Dist: sentry-sdk[fastapi]>=2.49.0
26
+ Requires-Dist: sqlalchemy[asyncio]>=2.0.45
27
+ Requires-Dist: starlette>=0.50.0
28
+ Requires-Dist: uvicorn>=0.40.0
22
29
 
23
30
  # sycommon-python-lib
24
31
 
@@ -0,0 +1,40 @@
1
+ [project]
2
+ name = "sycommon-python-lib"
3
+ version = "0.1.57b6"
4
+ description = "Add your description here"
5
+ readme = "README.md"
6
+ requires-python = ">=3.11"
7
+ dependencies = [
8
+ "aio-pika>=9.5.8",
9
+ "aiohttp>=3.13.3",
10
+ "aiomysql>=0.3.2",
11
+ "decorator>=5.2.1",
12
+ "fastapi>=0.128.0",
13
+ "kafka-python>=2.3.0",
14
+ "langchain>=1.2.3",
15
+ "langchain-core>=1.2.7",
16
+ "langchain-openai>=1.1.7",
17
+ "langfuse>=3.11.2",
18
+ "langgraph>=1.0.6",
19
+ "loguru>=0.7.3",
20
+ "mysql-connector-python>=9.5.0",
21
+ "nacos-sdk-python>=2.0.9,<3.0",
22
+ "psutil>=7.2.1",
23
+ "pydantic>=2.12.5",
24
+ "python-dotenv>=1.2.1",
25
+ "pyyaml>=6.0.3",
26
+ "sentry-sdk[fastapi]>=2.49.0",
27
+ "sqlalchemy[asyncio]>=2.0.45",
28
+ "starlette>=0.50.0",
29
+ "uvicorn>=0.40.0",
30
+ ]
31
+
32
+ [tool.setuptools]
33
+ packages = {find = {where = ["src"]}}
34
+
35
+ [build-system]
36
+ requires = ["setuptools"]
37
+ build-backend = "setuptools.build_meta"
38
+
39
+ [project.scripts]
40
+ sycommon = "command.cli:main"
@@ -15,14 +15,13 @@ class Config(metaclass=SingletonMeta):
15
15
  with open(config_file, 'r', encoding='utf-8') as f:
16
16
  self.config = yaml.safe_load(f)
17
17
  self.MaxBytes = self.config.get('MaxBytes', 209715200)
18
- self.Timeout = self.config.get('Timeout', 300000)
19
- self.OCR = self.config.get('OCR', None)
20
- self.INVOICE_OCR = self.config.get('INVOICE_OCR', None)
21
- self.UnstructuredAPI = self.config.get('UnstructuredAPI', None)
18
+ self.Timeout = self.config.get('Timeout', 600000)
22
19
  self.MaxRetries = self.config.get('MaxRetries', 3)
23
20
  self.llm_configs = []
24
21
  self.embedding_configs = []
25
22
  self.reranker_configs = []
23
+ self.sentry_configs = []
24
+ self.langfuse_configs = []
26
25
  self._process_config()
27
26
 
28
27
  def get_llm_config(self, model_name):
@@ -43,6 +42,18 @@ class Config(metaclass=SingletonMeta):
43
42
  return llm
44
43
  raise ValueError(f"No configuration found for model: {model_name}")
45
44
 
45
+ def get_sentry_config(self, name):
46
+ for sentry in self.sentry_configs:
47
+ if sentry.get('name') == name:
48
+ return sentry
49
+ raise ValueError(f"No configuration found for server: {name}")
50
+
51
+ def get_langfuse_config(self, name):
52
+ for langfuse in self.langfuse_configs:
53
+ if langfuse.get('name') == name:
54
+ return langfuse
55
+ raise ValueError(f"No configuration found for server: {name}")
56
+
46
57
  def _process_config(self):
47
58
  llm_config_list = self.config.get('LLMConfig', [])
48
59
  for llm_config in llm_config_list:
@@ -71,3 +82,17 @@ class Config(metaclass=SingletonMeta):
71
82
  self.reranker_configs.append(validated_config.model_dump())
72
83
  except ValueError as e:
73
84
  print(f"Invalid LLM configuration: {e}")
85
+
86
+ sentry_config_list = self.config.get('SentryConfig', [])
87
+ for sentry_config in sentry_config_list:
88
+ try:
89
+ from sycommon.config.SentryConfig import SentryConfig
90
+ validated_config = SentryConfig(**sentry_config)
91
+ self.sentry_configs.append(validated_config.model_dump())
92
+ except ValueError as e:
93
+ print(f"Invalid Sentry configuration: {e}")
94
+
95
+ def set_attr(self, share_configs: dict):
96
+ self.config = {**self.config, **
97
+ share_configs.get('llm', {}), **share_configs}
98
+ self._process_config()
@@ -0,0 +1,15 @@
1
+ from pydantic import BaseModel
2
+
3
+
4
+ class LangfuseConfig(BaseModel):
5
+ name: str
6
+ secretKey: str
7
+ publicKey: str
8
+ baseUrl: str
9
+ enable: bool
10
+
11
+ @classmethod
12
+ def from_config(cls, server_name: str):
13
+ from sycommon.config.Config import Config
14
+ langfuse_config = Config().get_langfuse_config(server_name)
15
+ return cls(**langfuse_config)
@@ -5,6 +5,7 @@ class RerankerConfig(BaseModel):
5
5
  model: str
6
6
  provider: str
7
7
  baseUrl: str
8
+ maxTokens: int
8
9
 
9
10
  @classmethod
10
11
  def from_config(cls, model_name: str):
@@ -0,0 +1,13 @@
1
+ from pydantic import BaseModel
2
+
3
+
4
+ class SentryConfig(BaseModel):
5
+ name: str
6
+ dsn: str
7
+ enable: bool
8
+
9
+ @classmethod
10
+ def from_config(cls, server_name: str):
11
+ from sycommon.config.Config import Config
12
+ sentry_config = Config().get_sentry_config(server_name)
13
+ return cls(**sentry_config)
@@ -0,0 +1,36 @@
1
+ from contextlib import asynccontextmanager
2
+ from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
3
+ from sycommon.config.Config import SingletonMeta
4
+ from sycommon.database.async_database_service import AsyncDatabaseService
5
+ from sycommon.logging.kafka_log import SYLogger
6
+
7
+
8
+ class AsyncBaseDBService(metaclass=SingletonMeta):
9
+ """数据库操作基础服务类,封装异步会话管理功能"""
10
+
11
+ def __init__(self):
12
+ # 获取异步引擎 (假设 DatabaseService.engine() 返回的是 AsyncEngine)
13
+ self.engine = AsyncDatabaseService.engine()
14
+
15
+ # 创建异步 Session 工厂
16
+ # class_=AsyncSession 是必须的,用于指定生成的是异步会话
17
+ self.Session = async_sessionmaker(
18
+ bind=self.engine,
19
+ class_=AsyncSession,
20
+ expire_on_commit=False
21
+ )
22
+
23
+ @asynccontextmanager
24
+ async def session(self):
25
+ """
26
+ 异步数据库会话上下文管理器
27
+ 自动处理会话的创建、提交、回滚和关闭
28
+ """
29
+ async with self.Session() as session:
30
+ try:
31
+ yield session
32
+ await session.commit()
33
+ except Exception as e:
34
+ await session.rollback()
35
+ SYLogger.error(f"Database operation failed: {str(e)}")
36
+ raise
@@ -0,0 +1,96 @@
1
+ from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession
2
+ from sqlalchemy import text
3
+
4
+ from sycommon.config.Config import SingletonMeta
5
+ from sycommon.config.DatabaseConfig import DatabaseConfig, convert_dict_keys
6
+ from sycommon.logging.kafka_log import SYLogger
7
+ from sycommon.logging.async_sql_logger import AsyncSQLTraceLogger
8
+ from sycommon.synacos.nacos_service import NacosService
9
+
10
+
11
+ class AsyncDatabaseService(metaclass=SingletonMeta):
12
+ _engine = None
13
+
14
+ @staticmethod
15
+ async def setup_database(config: dict, shareConfigKey: str):
16
+ common = NacosService(config).share_configs.get(shareConfigKey, {})
17
+ if common and common.get('spring', {}).get('datasource', None):
18
+ databaseConfig = common.get('spring', {}).get('datasource', None)
19
+ converted_dict = convert_dict_keys(databaseConfig)
20
+ db_config = DatabaseConfig.model_validate(converted_dict)
21
+
22
+ # 初始化 DatabaseConnector (传入配置)
23
+ connector = AsyncDatabaseConnector(db_config)
24
+
25
+ # 赋值 engine
26
+ AsyncDatabaseService._engine = connector.engine
27
+
28
+ # 执行异步测试连接
29
+ if not await connector.test_connection():
30
+ raise Exception("Database connection test failed")
31
+
32
+ @staticmethod
33
+ def engine():
34
+ return AsyncDatabaseService._engine
35
+
36
+
37
+ class AsyncDatabaseConnector(metaclass=SingletonMeta):
38
+ def __init__(self, db_config: DatabaseConfig):
39
+ # 从 DatabaseConfig 中提取数据库连接信息
40
+ self.db_user = db_config.username
41
+ self.db_password = db_config.password
42
+
43
+ # 提取 URL 中的主机、端口和数据库名
44
+ url_parts = db_config.url.split('//')[1].split('/')
45
+ host_port = url_parts[0].split(':')
46
+ self.db_host = host_port[0]
47
+ self.db_port = host_port[1]
48
+ self.db_name = url_parts[1].split('?')[0]
49
+
50
+ # 提取 URL 中的参数
51
+ params_str = url_parts[1].split('?')[1] if len(
52
+ url_parts[1].split('?')) > 1 else ''
53
+ params = {}
54
+ for param in params_str.split('&'):
55
+ if param:
56
+ key, value = param.split('=')
57
+ params[key] = value
58
+
59
+ # 在params中去掉指定的参数
60
+ for key in ['useUnicode', 'characterEncoding', 'serverTimezone', 'zeroDateTimeBehavior']:
61
+ if key in params:
62
+ del params[key]
63
+
64
+ # 构建数据库连接 URL
65
+ # 注意:这里将 mysqlconnector 替换为 aiomysql 以支持异步
66
+ self.db_url = f'mysql+aiomysql://{self.db_user}:{self.db_password}@{self.db_host}:{self.db_port}/{self.db_name}'
67
+
68
+ SYLogger.info(f"Database URL: {self.db_url}")
69
+
70
+ # 优化连接池配置
71
+ # 使用 create_async_engine 替代 create_engine
72
+ self.engine = create_async_engine(
73
+ self.db_url,
74
+ connect_args=params,
75
+ pool_size=10, # 连接池大小
76
+ max_overflow=20, # 最大溢出连接数
77
+ pool_timeout=30, # 连接超时时间(秒)
78
+ pool_recycle=3600, # 连接回收时间(秒)
79
+ pool_pre_ping=True, # 每次获取连接前检查连接是否有效
80
+ echo=False, # 打印 SQL 语句
81
+ )
82
+
83
+ # 注册 SQL 日志拦截器 (注意:SQLTraceLogger 需要支持异步引擎,或者您可能需要调整日志逻辑)
84
+ # 假设 SQLTraceLogger.setup_sql_logging 能够处理 AsyncEngine
85
+ AsyncSQLTraceLogger.setup_sql_logging(self.engine)
86
+
87
+ async def test_connection(self):
88
+ try:
89
+ # 异步上下文管理器
90
+ async with self.engine.connect() as connection:
91
+ # 执行简单查询
92
+ await connection.execute(text("SELECT 1"))
93
+ return True
94
+ except Exception as e:
95
+ SYLogger.error(f"Database connection test failed: {e}")
96
+ return False
@@ -0,0 +1,353 @@
1
+ import asyncio
2
+ import aiohttp
3
+ import atexit
4
+ from typing import Union, List, Optional, Dict
5
+ from sycommon.config.Config import SingletonMeta
6
+ from sycommon.config.EmbeddingConfig import EmbeddingConfig
7
+ from sycommon.config.RerankerConfig import RerankerConfig
8
+ from sycommon.logging.kafka_log import SYLogger
9
+
10
+
11
+ class Embedding(metaclass=SingletonMeta):
12
+ def __init__(self):
13
+ # 1. 并发限制
14
+ self.max_concurrency = 20
15
+ # 保留默认模型名称
16
+ self.default_embedding_model = "bge-large-zh-v1.5"
17
+ self.default_reranker_model = "bge-reranker-large"
18
+
19
+ # 初始化默认模型的基础URL
20
+ self.embeddings_base_url = EmbeddingConfig.from_config(
21
+ self.default_embedding_model).baseUrl
22
+ self.reranker_base_url = RerankerConfig.from_config(
23
+ self.default_reranker_model).baseUrl
24
+
25
+ # [修复] 缓存配置URL,避免高并发下重复读取配置文件
26
+ self._embedding_url_cache: Dict[str, str] = {
27
+ self.default_embedding_model: self.embeddings_base_url
28
+ }
29
+ self._reranker_url_cache: Dict[str, str] = {
30
+ self.default_reranker_model: self.reranker_base_url
31
+ }
32
+
33
+ # 并发信号量
34
+ self.semaphore = asyncio.Semaphore(self.max_concurrency)
35
+ self.default_timeout = aiohttp.ClientTimeout(total=None)
36
+
37
+ # 核心优化:创建全局可复用的ClientSession(连接池复用)
38
+ self.session = None
39
+
40
+ # [修复] 注册退出钩子,确保程序结束时关闭连接池
41
+ atexit.register(self._sync_close_session)
42
+
43
+ async def init_session(self):
44
+ """初始化全局ClientSession(仅创建一次)"""
45
+ if self.session is None or self.session.closed:
46
+ # 配置连接池参数,适配高并发
47
+ connector = aiohttp.TCPConnector(
48
+ limit=self.max_concurrency, # 连接池最大连接数
49
+ limit_per_host=self.max_concurrency, # 每个域名的最大连接数
50
+ ttl_dns_cache=300, # DNS缓存时间
51
+ enable_cleanup_closed=True # 自动清理关闭的连接
52
+ )
53
+ self.session = aiohttp.ClientSession(
54
+ connector=connector,
55
+ timeout=self.default_timeout
56
+ )
57
+
58
+ async def close_session(self):
59
+ """关闭全局Session(程序退出时调用)"""
60
+ if self.session and not self.session.closed:
61
+ await self.session.close()
62
+
63
+ def _sync_close_session(self):
64
+ """同步关闭Session的封装,供atexit调用"""
65
+ try:
66
+ loop = asyncio.get_event_loop()
67
+ if loop.is_running():
68
+ # [修复] 修正缩进,确保 create_task 的异常能被捕获
69
+ try:
70
+ loop.create_task(self.close_session())
71
+ except Exception:
72
+ pass
73
+ else:
74
+ try:
75
+ loop.run_until_complete(self.close_session())
76
+ except Exception:
77
+ pass
78
+ except Exception:
79
+ # 捕获获取 loop 时的异常
80
+ pass
81
+
82
+ def _get_embedding_url(self, model: str) -> str:
83
+ """获取Embedding URL(带缓存)"""
84
+ if model not in self._embedding_url_cache:
85
+ self._embedding_url_cache[model] = EmbeddingConfig.from_config(
86
+ model).baseUrl
87
+ return self._embedding_url_cache[model]
88
+
89
+ def _get_reranker_url(self, model: str) -> str:
90
+ """获取Reranker URL(带缓存)"""
91
+ if model not in self._reranker_url_cache:
92
+ self._reranker_url_cache[model] = RerankerConfig.from_config(
93
+ model).baseUrl
94
+ return self._reranker_url_cache[model]
95
+
96
+ async def _get_embeddings_http_core(
97
+ self,
98
+ input: Union[str, List[str]],
99
+ encoding_format: str = None,
100
+ model: str = None,
101
+ timeout: aiohttp.ClientTimeout = None,
102
+ **kwargs
103
+ ):
104
+ """embedding请求核心逻辑"""
105
+ await self.init_session() # 确保Session已初始化
106
+ async with self.semaphore:
107
+ request_timeout = timeout or self.default_timeout
108
+ target_model = model or self.default_embedding_model
109
+
110
+ # [修复] 使用缓存获取URL
111
+ target_base_url = self._get_embedding_url(target_model)
112
+ url = f"{target_base_url}/v1/embeddings"
113
+
114
+ request_body = {
115
+ "model": target_model,
116
+ "input": input,
117
+ "encoding_format": encoding_format or "float"
118
+ }
119
+ request_body.update(kwargs)
120
+
121
+ # 复用全局Session
122
+ try:
123
+ async with self.session.post(
124
+ url,
125
+ json=request_body,
126
+ timeout=request_timeout
127
+ ) as response:
128
+ if response.status != 200:
129
+ error_detail = await response.text()
130
+ # [日志] 记录详细的HTTP错误响应
131
+ SYLogger.error(
132
+ f"Embedding request HTTP Error. Status: {response.status}, "
133
+ f"Model: {target_model}, URL: {url}. Detail: {error_detail}"
134
+ )
135
+ return None
136
+ return await response.json()
137
+ except (aiohttp.ClientConnectionResetError, asyncio.TimeoutError, aiohttp.ClientError) as e:
138
+ # [日志] 记录网络错误
139
+ SYLogger.error(
140
+ f"Embedding request Network Error. Model: {target_model}, URL: {url}. "
141
+ f"Error: {e.__class__.__name__} - {str(e)}"
142
+ )
143
+ return None
144
+ except Exception as e:
145
+ # 记录其他未预期的异常
146
+ SYLogger.error(
147
+ f"Unexpected error in _get_embeddings_http_core: {str(e)}", exc_info=True)
148
+ return None
149
+
150
+ async def _get_embeddings_http_async(
151
+ self,
152
+ input: Union[str, List[str]],
153
+ encoding_format: str = None,
154
+ model: str = None,
155
+ timeout: aiohttp.ClientTimeout = None, ** kwargs
156
+ ):
157
+ """对外暴露的embedding请求方法"""
158
+ return await self._get_embeddings_http_core(
159
+ input, encoding_format, model, timeout, ** kwargs
160
+ )
161
+
162
+ async def _get_reranker_http_core(
163
+ self,
164
+ documents: List[str],
165
+ query: str,
166
+ top_n: Optional[int] = None,
167
+ model: str = None,
168
+ max_chunks_per_doc: Optional[int] = None,
169
+ return_documents: Optional[bool] = True,
170
+ return_len: Optional[bool] = True,
171
+ timeout: aiohttp.ClientTimeout = None, ** kwargs
172
+ ):
173
+ """reranker请求核心逻辑"""
174
+ await self.init_session() # 确保Session已初始化
175
+ async with self.semaphore:
176
+ request_timeout = timeout or self.default_timeout
177
+ target_model = model or self.default_reranker_model
178
+
179
+ # [修复] 使用缓存获取URL
180
+ target_base_url = self._get_reranker_url(target_model)
181
+ url = f"{target_base_url}/v1/rerank"
182
+
183
+ request_body = {
184
+ "model": target_model,
185
+ "documents": documents,
186
+ "query": query,
187
+ "top_n": top_n or len(documents),
188
+ "max_chunks_per_doc": max_chunks_per_doc,
189
+ "return_documents": return_documents,
190
+ "return_len": return_len,
191
+ }
192
+ request_body.update(kwargs)
193
+
194
+ # 复用全局Session
195
+ try:
196
+ async with self.session.post(
197
+ url,
198
+ json=request_body,
199
+ timeout=request_timeout
200
+ ) as response:
201
+ if response.status != 200:
202
+ error_detail = await response.text()
203
+ # [日志] 记录详细的HTTP错误响应
204
+ SYLogger.error(
205
+ f"Reranker request HTTP Error. Status: {response.status}, "
206
+ f"Model: {target_model}, URL: {url}. Detail: {error_detail}"
207
+ )
208
+ return None
209
+ return await response.json()
210
+ except (aiohttp.ClientConnectionResetError, asyncio.TimeoutError, aiohttp.ClientError) as e:
211
+ # [日志] 记录网络错误
212
+ SYLogger.error(
213
+ f"Reranker request Network Error. Model: {target_model}, URL: {url}. "
214
+ f"Error: {e.__class__.__name__} - {str(e)}"
215
+ )
216
+ return None
217
+ except Exception as e:
218
+ # 记录其他未预期的异常
219
+ SYLogger.error(
220
+ f"Unexpected error in _get_reranker_http_core: {str(e)}", exc_info=True)
221
+ return None
222
+
223
+ async def _get_reranker_http_async(
224
+ self,
225
+ documents: List[str],
226
+ query: str,
227
+ top_n: Optional[int] = None,
228
+ model: str = None,
229
+ max_chunks_per_doc: Optional[int] = None,
230
+ return_documents: Optional[bool] = True,
231
+ return_len: Optional[bool] = True,
232
+ timeout: aiohttp.ClientTimeout = None, ** kwargs
233
+ ):
234
+ """对外暴露的reranker请求方法"""
235
+ return await self._get_reranker_http_core(
236
+ documents, query, top_n, model, max_chunks_per_doc,
237
+ return_documents, return_len, timeout, **kwargs
238
+ )
239
+
240
+ def _get_dimension(self, model: str) -> int:
241
+ """获取模型维度,用于生成兜底零向量"""
242
+ try:
243
+ config = EmbeddingConfig.from_config(model)
244
+ if hasattr(config, 'dimension'):
245
+ return int(config.dimension)
246
+ except Exception:
247
+ pass
248
+ # 默认兜底 1024
249
+ return 1024
250
+
251
+ async def get_embeddings(
252
+ self,
253
+ corpus: List[str],
254
+ model: str = None,
255
+ timeout: Optional[Union[int, float]] = None
256
+ ):
257
+ """
258
+ 获取语料库的嵌入向量,结果顺序与输入语料库顺序一致
259
+
260
+ Args:
261
+ corpus: 待生成嵌入向量的文本列表
262
+ model: 可选,指定使用的embedding模型名称,默认使用bge-large-zh-v1.5
263
+ timeout: 可选,超时时间(秒):
264
+ - 传int/float:表示总超时时间(秒)
265
+ - 不传/None:使用默认永不超时配置
266
+ """
267
+ request_timeout = None
268
+ if timeout is not None:
269
+ if isinstance(timeout, (int, float)):
270
+ request_timeout = aiohttp.ClientTimeout(total=timeout)
271
+ else:
272
+ SYLogger.warning(
273
+ f"Invalid timeout type: {type(timeout)}, must be int/float, use default timeout")
274
+
275
+ actual_model = model or self.default_embedding_model
276
+
277
+ SYLogger.info(
278
+ f"Requesting embeddings for corpus: {len(corpus)} items (model: {actual_model}, max_concurrency: {self.max_concurrency}, timeout: {timeout or 'None'})")
279
+
280
+ all_vectors = []
281
+
282
+ # [修复] 增加 Chunk 处理逻辑,防止 corpus 过大导致内存溢出或协程过多
283
+ # 每次最多处理 max_concurrency * 2 个请求,避免一次性创建几十万个协程
284
+ batch_size = self.max_concurrency * 2
285
+
286
+ for i in range(0, len(corpus), batch_size):
287
+ batch_texts = corpus[i: i + batch_size]
288
+
289
+ # 给每个异步任务传入模型名称和超时配置
290
+ tasks = [self._get_embeddings_http_async(
291
+ text, model=model, timeout=request_timeout) for text in batch_texts]
292
+ results = await asyncio.gather(*tasks)
293
+
294
+ for result in results:
295
+ if result is None:
296
+ dim = self._get_dimension(actual_model)
297
+
298
+ zero_vector = [0.0] * dim
299
+ all_vectors.append(zero_vector)
300
+ # [日志] 补充日志,明确是补零操作
301
+ SYLogger.warning(
302
+ f"Embedding request failed (returned None), appending zero vector ({dim}D) for model {actual_model}")
303
+ continue
304
+
305
+ # 从返回结果中提取向量
306
+ try:
307
+ for item in result["data"]:
308
+ embedding = item["embedding"]
309
+ all_vectors.append(embedding)
310
+ except (KeyError, TypeError) as e:
311
+ SYLogger.error(f"Failed to parse embedding result: {e}")
312
+ dim = self._get_dimension(actual_model)
313
+ all_vectors.append([0.0] * dim)
314
+
315
+ SYLogger.info(
316
+ f"Embeddings for corpus created: {len(all_vectors)} vectors (model: {actual_model})")
317
+ return all_vectors
318
+
319
+ async def get_reranker(
320
+ self,
321
+ top_results: List[str],
322
+ query: str,
323
+ model: str = None,
324
+ timeout: Optional[Union[int, float]] = None
325
+ ):
326
+ """
327
+ 对搜索结果进行重排序
328
+
329
+ Args:
330
+ top_results: 待重排序的文本列表
331
+ query: 排序参考的查询语句
332
+ model: 可选,指定使用的reranker模型名称,默认使用bge-reranker-large
333
+ timeout: 可选,超时时间(秒):
334
+ - 传int/float:表示总超时时间(秒)
335
+ - 不传/None:使用默认永不超时配置
336
+ """
337
+ request_timeout = None
338
+ if timeout is not None:
339
+ if isinstance(timeout, (int, float)):
340
+ request_timeout = aiohttp.ClientTimeout(total=timeout)
341
+ else:
342
+ SYLogger.warning(
343
+ f"Invalid timeout type: {type(timeout)}, must be int/float, use default timeout")
344
+
345
+ actual_model = model or self.default_reranker_model
346
+ SYLogger.info(
347
+ f"Requesting reranker for top_results: {top_results} (model: {actual_model}, max_concurrency: {self.max_concurrency}, timeout: {timeout or 'None'})")
348
+
349
+ data = await self._get_reranker_http_async(
350
+ top_results, query, model=model, timeout=request_timeout)
351
+ SYLogger.info(
352
+ f"Reranker for top_results completed (model: {actual_model})")
353
+ return data