staran 0.6.0__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- staran/__init__.py +10 -0
- staran/tools/__init__.py +5 -5
- staran-1.0.0.dist-info/METADATA +301 -0
- staran-1.0.0.dist-info/RECORD +8 -0
- staran/banks/__init__.py +0 -30
- staran/banks/xinjiang_icbc/__init__.py +0 -90
- staran/engines/__init__.py +0 -65
- staran/engines/base.py +0 -255
- staran/engines/hive.py +0 -163
- staran/engines/spark.py +0 -252
- staran/engines/turing.py +0 -439
- staran/examples/__init__.py +0 -8
- staran/examples/aum_longtail.py +0 -250
- staran/examples/aum_longtail_old.py +0 -487
- staran/features/__init__.py +0 -59
- staran/features/engines.py +0 -284
- staran/features/generator.py +0 -603
- staran/features/manager.py +0 -155
- staran/features/schema.py +0 -193
- staran/models/__init__.py +0 -72
- staran/models/bank_configs.py +0 -269
- staran/models/config.py +0 -271
- staran/models/daifa_models.py +0 -361
- staran/models/registry.py +0 -281
- staran/models/target.py +0 -321
- staran/schemas/__init__.py +0 -27
- staran/schemas/aum/__init__.py +0 -210
- staran/schemas/document_generator.py +0 -350
- staran/tools/document_generator.py +0 -350
- staran-0.6.0.dist-info/METADATA +0 -564
- staran-0.6.0.dist-info/RECORD +0 -33
- {staran-0.6.0.dist-info → staran-1.0.0.dist-info}/WHEEL +0 -0
- {staran-0.6.0.dist-info → staran-1.0.0.dist-info}/licenses/LICENSE +0 -0
- {staran-0.6.0.dist-info → staran-1.0.0.dist-info}/top_level.txt +0 -0
staran/features/manager.py
DELETED
@@ -1,155 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
# -*- coding: utf-8 -*-
|
3
|
-
|
4
|
-
"""
|
5
|
-
特征管理器
|
6
|
-
负责特征工程的核心管理功能,基于新的引擎架构
|
7
|
-
"""
|
8
|
-
|
9
|
-
from typing import Optional, Dict, Any, List, Callable
|
10
|
-
from datetime import datetime
|
11
|
-
from ..engines import BaseEngine, create_engine, DatabaseType
|
12
|
-
|
13
|
-
|
14
|
-
class FeatureManager:
|
15
|
-
"""
|
16
|
-
特征管理器 - 使用引擎架构的核心特征管理
|
17
|
-
"""
|
18
|
-
|
19
|
-
def __init__(self, database_name: str, engine_type: str = "spark",
|
20
|
-
sql_executor: Optional[Callable] = None):
|
21
|
-
"""
|
22
|
-
初始化特征管理器
|
23
|
-
|
24
|
-
Args:
|
25
|
-
database_name: 数据库名称
|
26
|
-
engine_type: 引擎类型 ('spark', 'hive', 'turing')
|
27
|
-
sql_executor: SQL执行器函数 (可选,仅用于非turing引擎)
|
28
|
-
"""
|
29
|
-
self.database_name = database_name
|
30
|
-
self.engine_type = engine_type
|
31
|
-
|
32
|
-
# 创建数据库引擎
|
33
|
-
self.engine = create_engine(
|
34
|
-
engine_type=engine_type,
|
35
|
-
database_name=database_name,
|
36
|
-
sql_executor=sql_executor
|
37
|
-
)
|
38
|
-
|
39
|
-
# 委托给引擎的方法
|
40
|
-
def execute_sql(self, sql: str, description: str = "") -> Any:
|
41
|
-
"""执行SQL语句"""
|
42
|
-
return self.engine.execute_sql(sql, description)
|
43
|
-
|
44
|
-
def get_full_table_name(self, table_name: str) -> str:
|
45
|
-
"""获取完整的表名(包含数据库名)"""
|
46
|
-
return self.engine.get_full_table_name(table_name)
|
47
|
-
|
48
|
-
def generate_table_name(self, base_name: str, year: int, month: int,
|
49
|
-
suffix: str = "raw") -> str:
|
50
|
-
"""
|
51
|
-
生成标准化的表名
|
52
|
-
格式: {base_name}_{yyyy}_{MM}_{suffix}
|
53
|
-
"""
|
54
|
-
return self.engine.generate_table_name(base_name, year, month, suffix)
|
55
|
-
|
56
|
-
def create_table(self, table_name: str, select_sql: str,
|
57
|
-
execute: bool = False, **kwargs) -> Dict[str, Any]:
|
58
|
-
"""创建表"""
|
59
|
-
return self.engine.create_table(table_name, select_sql, execute, **kwargs)
|
60
|
-
|
61
|
-
def drop_table(self, table_name: str, execute: bool = False) -> Dict[str, Any]:
|
62
|
-
"""删除表"""
|
63
|
-
return self.engine.drop_table(table_name, execute)
|
64
|
-
|
65
|
-
def download_table_data(self, table_name: str, output_path: str,
|
66
|
-
**kwargs) -> Dict[str, Any]:
|
67
|
-
"""下载表数据"""
|
68
|
-
return self.engine.download_table_data(table_name, output_path, **kwargs)
|
69
|
-
|
70
|
-
def download_query_result(self, sql: str, output_path: str,
|
71
|
-
**kwargs) -> Dict[str, Any]:
|
72
|
-
"""下载查询结果"""
|
73
|
-
return self.engine.download_query_result(sql, output_path, **kwargs)
|
74
|
-
|
75
|
-
def get_execution_history(self) -> List[Dict]:
|
76
|
-
"""获取SQL执行历史"""
|
77
|
-
return self.engine.get_execution_history()
|
78
|
-
|
79
|
-
def clear_history(self):
|
80
|
-
"""清空执行历史"""
|
81
|
-
self.engine.clear_history()
|
82
|
-
|
83
|
-
def __str__(self):
|
84
|
-
return f"FeatureManager(engine={self.engine})"
|
85
|
-
|
86
|
-
|
87
|
-
class FeatureTableManager:
|
88
|
-
"""
|
89
|
-
特征表管理器
|
90
|
-
负责特征表的创建、删除、管理等操作
|
91
|
-
"""
|
92
|
-
|
93
|
-
def __init__(self, feature_manager: FeatureManager):
|
94
|
-
"""
|
95
|
-
初始化表管理器
|
96
|
-
|
97
|
-
Args:
|
98
|
-
feature_manager: 特征管理器实例
|
99
|
-
"""
|
100
|
-
self.feature_manager = feature_manager
|
101
|
-
self.created_tables = []
|
102
|
-
|
103
|
-
def create_feature_table(self, base_name: str, year: int, month: int,
|
104
|
-
version: int, sql: str, execute: bool = False,
|
105
|
-
**kwargs) -> str:
|
106
|
-
"""
|
107
|
-
创建特征表
|
108
|
-
|
109
|
-
Args:
|
110
|
-
base_name: 基础表名
|
111
|
-
year: 年份
|
112
|
-
month: 月份
|
113
|
-
version: 版本号
|
114
|
-
sql: 创建表的SQL
|
115
|
-
execute: 是否立即执行
|
116
|
-
**kwargs: 传递给引擎的其他参数
|
117
|
-
|
118
|
-
Returns:
|
119
|
-
创建的表名
|
120
|
-
"""
|
121
|
-
table_name = self.feature_manager.generate_table_name(base_name, year, month)
|
122
|
-
|
123
|
-
result = self.feature_manager.create_table(table_name, sql, execute, **kwargs)
|
124
|
-
|
125
|
-
if execute and result.get('status') == 'success':
|
126
|
-
self.created_tables.append(table_name)
|
127
|
-
|
128
|
-
return table_name
|
129
|
-
|
130
|
-
def drop_feature_table(self, table_name: str, execute: bool = False) -> str:
|
131
|
-
"""
|
132
|
-
删除特征表
|
133
|
-
|
134
|
-
Args:
|
135
|
-
table_name: 表名
|
136
|
-
execute: 是否立即执行
|
137
|
-
|
138
|
-
Returns:
|
139
|
-
删除表的SQL
|
140
|
-
"""
|
141
|
-
result = self.feature_manager.drop_table(table_name, execute)
|
142
|
-
|
143
|
-
if execute and result.get('status') == 'success':
|
144
|
-
if table_name in self.created_tables:
|
145
|
-
self.created_tables.remove(table_name)
|
146
|
-
|
147
|
-
return result.get('sql', '')
|
148
|
-
|
149
|
-
def get_created_tables(self) -> List[str]:
|
150
|
-
"""获取已创建的表列表"""
|
151
|
-
return self.created_tables.copy()
|
152
|
-
|
153
|
-
def table_exists(self, table_name: str) -> bool:
|
154
|
-
"""检查表是否存在(简单检查,实际需要查询数据库)"""
|
155
|
-
return table_name in self.created_tables
|
staran/features/schema.py
DELETED
@@ -1,193 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
# -*- coding: utf-8 -*-
|
3
|
-
|
4
|
-
"""
|
5
|
-
表结构定义模块
|
6
|
-
定义数据库表的字段、类型和分析特性
|
7
|
-
"""
|
8
|
-
|
9
|
-
from enum import Enum
|
10
|
-
from typing import Dict, List, Optional, Union
|
11
|
-
from dataclasses import dataclass
|
12
|
-
|
13
|
-
|
14
|
-
class FieldType(Enum):
|
15
|
-
"""字段类型枚举"""
|
16
|
-
STRING = "string"
|
17
|
-
INTEGER = "int"
|
18
|
-
BIGINT = "bigint"
|
19
|
-
DECIMAL = "decimal"
|
20
|
-
DOUBLE = "double"
|
21
|
-
FLOAT = "float"
|
22
|
-
DATE = "date"
|
23
|
-
TIMESTAMP = "timestamp"
|
24
|
-
BOOLEAN = "boolean"
|
25
|
-
|
26
|
-
|
27
|
-
@dataclass
|
28
|
-
class Field:
|
29
|
-
"""字段定义"""
|
30
|
-
name: str
|
31
|
-
field_type: FieldType
|
32
|
-
is_primary_key: bool = False
|
33
|
-
is_date_field: bool = False
|
34
|
-
aggregatable: bool = False
|
35
|
-
nullable: bool = True
|
36
|
-
comment: str = ""
|
37
|
-
|
38
|
-
def __post_init__(self):
|
39
|
-
"""初始化后处理"""
|
40
|
-
# 数值类型默认可聚合
|
41
|
-
if self.field_type in [FieldType.INTEGER, FieldType.BIGINT,
|
42
|
-
FieldType.DECIMAL, FieldType.DOUBLE, FieldType.FLOAT]:
|
43
|
-
if not hasattr(self, '_aggregatable_set'):
|
44
|
-
self.aggregatable = True
|
45
|
-
|
46
|
-
def set_aggregatable(self, aggregatable: bool):
|
47
|
-
"""设置是否可聚合"""
|
48
|
-
self.aggregatable = aggregatable
|
49
|
-
self._aggregatable_set = True
|
50
|
-
return self
|
51
|
-
|
52
|
-
|
53
|
-
class TableSchema:
|
54
|
-
"""表结构定义类"""
|
55
|
-
|
56
|
-
def __init__(self, table_name: str, comment: str = ""):
|
57
|
-
"""
|
58
|
-
初始化表结构
|
59
|
-
|
60
|
-
Args:
|
61
|
-
table_name: 表名
|
62
|
-
comment: 表注释
|
63
|
-
"""
|
64
|
-
self.table_name = table_name
|
65
|
-
self.comment = comment
|
66
|
-
self.fields: Dict[str, Field] = {}
|
67
|
-
self.primary_key: Optional[str] = None
|
68
|
-
self.date_field: Optional[str] = None
|
69
|
-
self.is_monthly_unique: bool = False
|
70
|
-
|
71
|
-
def add_field(self, name: str, field_type: Union[str, FieldType],
|
72
|
-
aggregatable: bool = None, nullable: bool = True,
|
73
|
-
comment: str = "") -> 'TableSchema':
|
74
|
-
"""
|
75
|
-
添加字段
|
76
|
-
|
77
|
-
Args:
|
78
|
-
name: 字段名
|
79
|
-
field_type: 字段类型
|
80
|
-
aggregatable: 是否可聚合(None时自动判断)
|
81
|
-
nullable: 是否可空
|
82
|
-
comment: 字段注释
|
83
|
-
|
84
|
-
Returns:
|
85
|
-
self: 支持链式调用
|
86
|
-
"""
|
87
|
-
if isinstance(field_type, str):
|
88
|
-
field_type = FieldType(field_type.lower())
|
89
|
-
|
90
|
-
field = Field(
|
91
|
-
name=name,
|
92
|
-
field_type=field_type,
|
93
|
-
nullable=nullable,
|
94
|
-
comment=comment
|
95
|
-
)
|
96
|
-
|
97
|
-
if aggregatable is not None:
|
98
|
-
field.set_aggregatable(aggregatable)
|
99
|
-
|
100
|
-
self.fields[name] = field
|
101
|
-
return self
|
102
|
-
|
103
|
-
def add_primary_key(self, name: str, field_type: Union[str, FieldType],
|
104
|
-
comment: str = "主键") -> 'TableSchema':
|
105
|
-
"""添加主键字段"""
|
106
|
-
if isinstance(field_type, str):
|
107
|
-
field_type = FieldType(field_type.lower())
|
108
|
-
|
109
|
-
field = Field(
|
110
|
-
name=name,
|
111
|
-
field_type=field_type,
|
112
|
-
is_primary_key=True,
|
113
|
-
nullable=False,
|
114
|
-
comment=comment
|
115
|
-
)
|
116
|
-
field.set_aggregatable(False)
|
117
|
-
|
118
|
-
self.fields[name] = field
|
119
|
-
self.primary_key = name
|
120
|
-
return self
|
121
|
-
|
122
|
-
def add_date_field(self, name: str, field_type: Union[str, FieldType] = FieldType.DATE,
|
123
|
-
comment: str = "日期字段") -> 'TableSchema':
|
124
|
-
"""添加日期字段"""
|
125
|
-
if isinstance(field_type, str):
|
126
|
-
field_type = FieldType(field_type.lower())
|
127
|
-
|
128
|
-
field = Field(
|
129
|
-
name=name,
|
130
|
-
field_type=field_type,
|
131
|
-
is_date_field=True,
|
132
|
-
nullable=False,
|
133
|
-
comment=comment
|
134
|
-
)
|
135
|
-
field.set_aggregatable(False)
|
136
|
-
|
137
|
-
self.fields[name] = field
|
138
|
-
self.date_field = name
|
139
|
-
return self
|
140
|
-
|
141
|
-
def set_monthly_unique(self, is_unique: bool = True) -> 'TableSchema':
|
142
|
-
"""设置是否为每人每月唯一数据"""
|
143
|
-
self.is_monthly_unique = is_unique
|
144
|
-
return self
|
145
|
-
|
146
|
-
def get_aggregatable_fields(self) -> List[Field]:
|
147
|
-
"""获取可聚合字段列表"""
|
148
|
-
return [field for field in self.fields.values() if field.aggregatable]
|
149
|
-
|
150
|
-
def get_non_aggregatable_fields(self) -> List[Field]:
|
151
|
-
"""获取不可聚合字段列表(用于原始拷贝)"""
|
152
|
-
return [field for field in self.fields.values()
|
153
|
-
if not field.aggregatable and not field.is_primary_key and not field.is_date_field]
|
154
|
-
|
155
|
-
def validate(self) -> bool:
|
156
|
-
"""验证表结构"""
|
157
|
-
if not self.primary_key:
|
158
|
-
raise ValueError("表必须定义主键")
|
159
|
-
|
160
|
-
if not self.date_field:
|
161
|
-
raise ValueError("表必须定义日期字段")
|
162
|
-
|
163
|
-
if self.primary_key not in self.fields:
|
164
|
-
raise ValueError(f"主键字段 {self.primary_key} 不存在")
|
165
|
-
|
166
|
-
if self.date_field not in self.fields:
|
167
|
-
raise ValueError(f"日期字段 {self.date_field} 不存在")
|
168
|
-
|
169
|
-
return True
|
170
|
-
|
171
|
-
def __str__(self) -> str:
|
172
|
-
"""字符串表示"""
|
173
|
-
lines = [f"Table: {self.table_name}"]
|
174
|
-
if self.comment:
|
175
|
-
lines.append(f"Comment: {self.comment}")
|
176
|
-
|
177
|
-
lines.append(f"Primary Key: {self.primary_key}")
|
178
|
-
lines.append(f"Date Field: {self.date_field}")
|
179
|
-
lines.append(f"Monthly Unique: {self.is_monthly_unique}")
|
180
|
-
lines.append("Fields:")
|
181
|
-
|
182
|
-
for field in self.fields.values():
|
183
|
-
flag_str = ""
|
184
|
-
if field.is_primary_key:
|
185
|
-
flag_str += "[PK]"
|
186
|
-
if field.is_date_field:
|
187
|
-
flag_str += "[DATE]"
|
188
|
-
if field.aggregatable:
|
189
|
-
flag_str += "[AGG]"
|
190
|
-
|
191
|
-
lines.append(f" {field.name}: {field.field_type.value} {flag_str}")
|
192
|
-
|
193
|
-
return "\n".join(lines)
|
staran/models/__init__.py
DELETED
@@ -1,72 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
staran.models v0.6.0 - 新疆工行代发长尾客户模型管理
|
3
|
-
|
4
|
-
专门针对新疆工行代发长尾客户的两个核心模型:
|
5
|
-
1. 代发长尾客户提升3k预测模型 (daifa_longtail_upgrade_3k)
|
6
|
-
2. 代发长尾客户防流失1.5k预测模型 (daifa_longtail_churn_1_5k)
|
7
|
-
|
8
|
-
主要功能:
|
9
|
-
- 模型配置管理
|
10
|
-
- SQL驱动的目标变量定义
|
11
|
-
- 模型注册和版本控制
|
12
|
-
- 新疆工行特定配置
|
13
|
-
"""
|
14
|
-
|
15
|
-
from .config import ModelConfig, create_model_config
|
16
|
-
from .target import TargetDefinition, create_target_definition
|
17
|
-
from .registry import ModelRegistry, register_model, save_model_registry
|
18
|
-
from .daifa_models import (
|
19
|
-
create_daifa_longtail_upgrade_model,
|
20
|
-
create_daifa_longtail_churn_model,
|
21
|
-
get_available_daifa_models,
|
22
|
-
create_both_daifa_models
|
23
|
-
)
|
24
|
-
|
25
|
-
# 便捷函数
|
26
|
-
def create_xinjiang_icbc_models(output_dir: str = "./xinjiang_models") -> dict:
|
27
|
-
"""为新疆工行创建两个代发长尾客户模型"""
|
28
|
-
return create_both_daifa_models(output_dir)
|
29
|
-
|
30
|
-
def list_available_models() -> list:
|
31
|
-
"""列出所有可用的代发长尾客户模型"""
|
32
|
-
return get_available_daifa_models()
|
33
|
-
|
34
|
-
def get_model_summary() -> dict:
|
35
|
-
"""获取模型概述信息"""
|
36
|
-
return {
|
37
|
-
"version": "0.6.0",
|
38
|
-
"bank": "新疆工行",
|
39
|
-
"business_domain": "代发长尾客户",
|
40
|
-
"models": [
|
41
|
-
{
|
42
|
-
"name": "daifa_longtail_upgrade_3k",
|
43
|
-
"description": "预测下个月代发长尾客户资产提升3k的概率",
|
44
|
-
"target_amount": 3000,
|
45
|
-
"model_type": "binary_classification"
|
46
|
-
},
|
47
|
-
{
|
48
|
-
"name": "daifa_longtail_churn_1_5k",
|
49
|
-
"description": "预测下个月代发长尾客户流失1.5k资产的风险",
|
50
|
-
"target_amount": 1500,
|
51
|
-
"model_type": "binary_classification"
|
52
|
-
}
|
53
|
-
]
|
54
|
-
}
|
55
|
-
|
56
|
-
__all__ = [
|
57
|
-
# 核心组件
|
58
|
-
'ModelConfig', 'TargetDefinition', 'ModelRegistry',
|
59
|
-
|
60
|
-
# 创建函数
|
61
|
-
'create_model_config', 'create_target_definition', 'register_model',
|
62
|
-
|
63
|
-
# 代发长尾模型
|
64
|
-
'create_daifa_longtail_upgrade_model', 'create_daifa_longtail_churn_model',
|
65
|
-
'create_both_daifa_models', 'get_available_daifa_models',
|
66
|
-
|
67
|
-
# 便捷函数
|
68
|
-
'create_xinjiang_icbc_models', 'list_available_models', 'get_model_summary',
|
69
|
-
'save_model_registry'
|
70
|
-
]
|
71
|
-
|
72
|
-
__version__ = "0.6.0"
|
staran/models/bank_configs.py
DELETED
@@ -1,269 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
银行特定配置模块
|
3
|
-
|
4
|
-
为不同银行提供定制化的配置和业务规则
|
5
|
-
"""
|
6
|
-
|
7
|
-
from enum import Enum
|
8
|
-
from typing import Dict, Any, List, Optional
|
9
|
-
from dataclasses import dataclass, field
|
10
|
-
|
11
|
-
|
12
|
-
class BankCode(Enum):
|
13
|
-
"""银行代码枚举"""
|
14
|
-
ICBC = "icbc" # 工商银行
|
15
|
-
CCB = "ccb" # 建设银行
|
16
|
-
BOC = "boc" # 中国银行
|
17
|
-
ABC = "abc" # 农业银行
|
18
|
-
CMB = "cmb" # 招商银行
|
19
|
-
GENERIC = "generic" # 通用配置
|
20
|
-
|
21
|
-
|
22
|
-
@dataclass
|
23
|
-
class BankConfig:
|
24
|
-
"""银行配置类"""
|
25
|
-
# 基本信息
|
26
|
-
bank_code: str # 银行代码
|
27
|
-
bank_name: str # 银行名称
|
28
|
-
region: str = "cn" # 地区代码
|
29
|
-
|
30
|
-
# 数据库配置
|
31
|
-
database_config: Dict[str, Any] = field(default_factory=dict)
|
32
|
-
|
33
|
-
# 表名映射 (不同银行的表名可能不同)
|
34
|
-
table_mappings: Dict[str, str] = field(default_factory=dict)
|
35
|
-
|
36
|
-
# 字段映射 (不同银行的字段名可能不同)
|
37
|
-
field_mappings: Dict[str, Dict[str, str]] = field(default_factory=dict)
|
38
|
-
|
39
|
-
# 业务规则
|
40
|
-
business_rules: Dict[str, Any] = field(default_factory=dict)
|
41
|
-
|
42
|
-
# 合规要求
|
43
|
-
compliance_rules: Dict[str, Any] = field(default_factory=dict)
|
44
|
-
|
45
|
-
# 数据处理规则
|
46
|
-
data_processing_rules: Dict[str, Any] = field(default_factory=dict)
|
47
|
-
|
48
|
-
# 模型部署配置
|
49
|
-
deployment_config: Dict[str, Any] = field(default_factory=dict)
|
50
|
-
|
51
|
-
# 特征工程配置
|
52
|
-
feature_engineering_config: Dict[str, Any] = field(default_factory=dict)
|
53
|
-
|
54
|
-
def get_table_name(self, standard_table: str) -> str:
|
55
|
-
"""获取银行特定的表名"""
|
56
|
-
return self.table_mappings.get(standard_table, standard_table)
|
57
|
-
|
58
|
-
def get_field_name(self, table: str, standard_field: str) -> str:
|
59
|
-
"""获取银行特定的字段名"""
|
60
|
-
table_fields = self.field_mappings.get(table, {})
|
61
|
-
return table_fields.get(standard_field, standard_field)
|
62
|
-
|
63
|
-
def get_business_rule(self, rule_name: str, default=None):
|
64
|
-
"""获取业务规则"""
|
65
|
-
return self.business_rules.get(rule_name, default)
|
66
|
-
|
67
|
-
def validate_compliance(self, operation: str) -> bool:
|
68
|
-
"""验证操作是否符合合规要求"""
|
69
|
-
compliance_checks = self.compliance_rules.get(operation, {})
|
70
|
-
# 这里可以实现具体的合规检查逻辑
|
71
|
-
return compliance_checks.get('enabled', True)
|
72
|
-
|
73
|
-
|
74
|
-
# 银行配置注册表
|
75
|
-
_BANK_CONFIGS: Dict[str, BankConfig] = {}
|
76
|
-
|
77
|
-
|
78
|
-
def register_bank_config(config: BankConfig):
|
79
|
-
"""注册银行配置"""
|
80
|
-
_BANK_CONFIGS[config.bank_code] = config
|
81
|
-
print(f"✅ 银行配置 {config.bank_code} ({config.bank_name}) 注册成功")
|
82
|
-
|
83
|
-
|
84
|
-
def get_bank_config(bank_code: str) -> Optional[BankConfig]:
|
85
|
-
"""获取银行配置"""
|
86
|
-
return _BANK_CONFIGS.get(bank_code)
|
87
|
-
|
88
|
-
|
89
|
-
def list_bank_configs() -> List[Dict[str, str]]:
|
90
|
-
"""列出所有银行配置"""
|
91
|
-
return [
|
92
|
-
{
|
93
|
-
'bank_code': config.bank_code,
|
94
|
-
'bank_name': config.bank_name,
|
95
|
-
'region': config.region
|
96
|
-
}
|
97
|
-
for config in _BANK_CONFIGS.values()
|
98
|
-
]
|
99
|
-
|
100
|
-
|
101
|
-
# 预定义银行配置
|
102
|
-
def create_icbc_config() -> BankConfig:
|
103
|
-
"""创建工商银行配置"""
|
104
|
-
return BankConfig(
|
105
|
-
bank_code="icbc",
|
106
|
-
bank_name="中国工商银行",
|
107
|
-
region="cn",
|
108
|
-
|
109
|
-
database_config={
|
110
|
-
"default_database": "dwegdata03000",
|
111
|
-
"connection_pool_size": 10,
|
112
|
-
"query_timeout": 300
|
113
|
-
},
|
114
|
-
|
115
|
-
table_mappings={
|
116
|
-
"behavior_table": "bi_hlwj_dfcw_f1_f4_wy",
|
117
|
-
"asset_avg_table": "bi_hlwj_zi_chan_avg_wy",
|
118
|
-
"asset_config_table": "bi_hlwj_zi_chang_month_total_zb",
|
119
|
-
"monthly_stat_table": "bi_hlwj_realy_month_stat_wy"
|
120
|
-
},
|
121
|
-
|
122
|
-
field_mappings={
|
123
|
-
"behavior_table": {
|
124
|
-
"customer_id": "party_id",
|
125
|
-
"date_field": "data_dt"
|
126
|
-
}
|
127
|
-
},
|
128
|
-
|
129
|
-
business_rules={
|
130
|
-
"data_retention_days": 90,
|
131
|
-
"min_sample_size": 1000,
|
132
|
-
"max_features": 500,
|
133
|
-
"risk_threshold": 0.8,
|
134
|
-
"aum_threshold": 100000,
|
135
|
-
"longtail_definition": {
|
136
|
-
"asset_threshold": 50000,
|
137
|
-
"activity_threshold": 0.3
|
138
|
-
}
|
139
|
-
},
|
140
|
-
|
141
|
-
compliance_rules={
|
142
|
-
"data_export": {
|
143
|
-
"enabled": True,
|
144
|
-
"approval_required": True,
|
145
|
-
"encryption_required": True
|
146
|
-
},
|
147
|
-
"model_deployment": {
|
148
|
-
"enabled": True,
|
149
|
-
"testing_required": True,
|
150
|
-
"documentation_required": True
|
151
|
-
},
|
152
|
-
"feature_selection": {
|
153
|
-
"enabled": True,
|
154
|
-
"sensitive_data_allowed": False,
|
155
|
-
"audit_trail_required": True
|
156
|
-
}
|
157
|
-
},
|
158
|
-
|
159
|
-
data_processing_rules={
|
160
|
-
"missing_value_strategy": "median",
|
161
|
-
"outlier_detection": True,
|
162
|
-
"outlier_threshold": 3.0,
|
163
|
-
"feature_scaling": "standard",
|
164
|
-
"categorical_encoding": "one_hot"
|
165
|
-
},
|
166
|
-
|
167
|
-
deployment_config={
|
168
|
-
"platform": "turing",
|
169
|
-
"environment": "production",
|
170
|
-
"monitoring_enabled": True,
|
171
|
-
"auto_scaling": True,
|
172
|
-
"backup_required": True
|
173
|
-
},
|
174
|
-
|
175
|
-
feature_engineering_config={
|
176
|
-
"time_windows": ["1_month", "3_months", "6_months", "1_year"],
|
177
|
-
"aggregation_functions": ["sum", "avg", "max", "min", "std"],
|
178
|
-
"interaction_features": True,
|
179
|
-
"polynomial_features": False,
|
180
|
-
"target_encoding": True
|
181
|
-
}
|
182
|
-
)
|
183
|
-
|
184
|
-
|
185
|
-
def create_generic_config() -> BankConfig:
|
186
|
-
"""创建通用银行配置"""
|
187
|
-
return BankConfig(
|
188
|
-
bank_code="generic",
|
189
|
-
bank_name="通用银行配置",
|
190
|
-
region="generic",
|
191
|
-
|
192
|
-
database_config={
|
193
|
-
"default_database": "default_db",
|
194
|
-
"connection_pool_size": 5,
|
195
|
-
"query_timeout": 180
|
196
|
-
},
|
197
|
-
|
198
|
-
table_mappings={
|
199
|
-
"behavior_table": "customer_behavior",
|
200
|
-
"asset_avg_table": "customer_assets",
|
201
|
-
"asset_config_table": "asset_config",
|
202
|
-
"monthly_stat_table": "monthly_stats"
|
203
|
-
},
|
204
|
-
|
205
|
-
business_rules={
|
206
|
-
"data_retention_days": 30,
|
207
|
-
"min_sample_size": 100,
|
208
|
-
"max_features": 100
|
209
|
-
},
|
210
|
-
|
211
|
-
compliance_rules={
|
212
|
-
"data_export": {"enabled": True},
|
213
|
-
"model_deployment": {"enabled": True}
|
214
|
-
},
|
215
|
-
|
216
|
-
data_processing_rules={
|
217
|
-
"missing_value_strategy": "mean",
|
218
|
-
"outlier_detection": False,
|
219
|
-
"feature_scaling": "none"
|
220
|
-
}
|
221
|
-
)
|
222
|
-
|
223
|
-
|
224
|
-
# 初始化默认银行配置
|
225
|
-
def initialize_default_configs():
|
226
|
-
"""初始化默认银行配置"""
|
227
|
-
# 注册工商银行配置
|
228
|
-
register_bank_config(create_icbc_config())
|
229
|
-
|
230
|
-
# 注册通用配置
|
231
|
-
register_bank_config(create_generic_config())
|
232
|
-
|
233
|
-
|
234
|
-
# 自动初始化
|
235
|
-
initialize_default_configs()
|
236
|
-
|
237
|
-
|
238
|
-
# 新疆工行特定配置
|
239
|
-
def create_xinjiang_icbc_config() -> BankConfig:
|
240
|
-
"""创建新疆工商银行配置"""
|
241
|
-
base_config = create_icbc_config()
|
242
|
-
|
243
|
-
# 基于基础工行配置进行定制
|
244
|
-
base_config.bank_code = "xinjiang_icbc"
|
245
|
-
base_config.bank_name = "新疆工商银行"
|
246
|
-
base_config.region = "xinjiang"
|
247
|
-
|
248
|
-
# 新疆特定的业务规则
|
249
|
-
base_config.business_rules.update({
|
250
|
-
"regional_compliance": True,
|
251
|
-
"minority_customer_support": True,
|
252
|
-
"language_support": ["zh", "ug"], # 中文和维吾尔语
|
253
|
-
"timezone": "Asia/Urumqi",
|
254
|
-
"currency_support": ["CNY"],
|
255
|
-
"cross_border_transaction": True
|
256
|
-
})
|
257
|
-
|
258
|
-
# 新疆特定的数据处理规则
|
259
|
-
base_config.data_processing_rules.update({
|
260
|
-
"character_encoding": "utf-8",
|
261
|
-
"regional_holidays": True,
|
262
|
-
"time_zone_conversion": True
|
263
|
-
})
|
264
|
-
|
265
|
-
return base_config
|
266
|
-
|
267
|
-
|
268
|
-
# 注册新疆工行配置
|
269
|
-
register_bank_config(create_xinjiang_icbc_config())
|