staran 0.4.2__tar.gz → 0.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. {staran-0.4.2/staran.egg-info → staran-0.6.0}/PKG-INFO +6 -6
  2. {staran-0.4.2 → staran-0.6.0}/README.md +5 -5
  3. {staran-0.4.2 → staran-0.6.0}/setup.py +4 -2
  4. staran-0.6.0/staran/__init__.py +0 -0
  5. staran-0.6.0/staran/banks/__init__.py +30 -0
  6. staran-0.6.0/staran/banks/xinjiang_icbc/__init__.py +90 -0
  7. staran-0.6.0/staran/models/__init__.py +72 -0
  8. staran-0.6.0/staran/models/config.py +271 -0
  9. staran-0.6.0/staran/models/daifa_models.py +361 -0
  10. staran-0.6.0/staran/models/registry.py +281 -0
  11. staran-0.6.0/staran/models/target.py +321 -0
  12. staran-0.6.0/staran/schemas/__init__.py +27 -0
  13. staran-0.6.0/staran/schemas/aum/__init__.py +210 -0
  14. {staran-0.4.2 → staran-0.6.0/staran.egg-info}/PKG-INFO +6 -6
  15. {staran-0.4.2 → staran-0.6.0}/staran.egg-info/SOURCES.txt +9 -5
  16. staran-0.4.2/staran/__init__.py +0 -261
  17. staran-0.4.2/staran/examples/__init__.py +0 -8
  18. staran-0.4.2/staran/examples/aum_longtail.py +0 -250
  19. staran-0.4.2/staran/examples/aum_longtail_old.py +0 -487
  20. staran-0.4.2/staran/schemas/__init__.py +0 -28
  21. staran-0.4.2/staran/schemas/aum/__init__.py +0 -306
  22. {staran-0.4.2 → staran-0.6.0}/LICENSE +0 -0
  23. {staran-0.4.2 → staran-0.6.0}/setup.cfg +0 -0
  24. {staran-0.4.2 → staran-0.6.0}/staran/engines/__init__.py +0 -0
  25. {staran-0.4.2 → staran-0.6.0}/staran/engines/base.py +0 -0
  26. {staran-0.4.2 → staran-0.6.0}/staran/engines/hive.py +0 -0
  27. {staran-0.4.2 → staran-0.6.0}/staran/engines/spark.py +0 -0
  28. {staran-0.4.2 → staran-0.6.0}/staran/engines/turing.py +0 -0
  29. {staran-0.4.2 → staran-0.6.0}/staran/features/__init__.py +0 -0
  30. {staran-0.4.2 → staran-0.6.0}/staran/features/engines.py +0 -0
  31. {staran-0.4.2 → staran-0.6.0}/staran/features/generator.py +0 -0
  32. {staran-0.4.2 → staran-0.6.0}/staran/features/manager.py +0 -0
  33. {staran-0.4.2 → staran-0.6.0}/staran/features/schema.py +0 -0
  34. {staran-0.4.2 → staran-0.6.0}/staran/tools/__init__.py +0 -0
  35. {staran-0.4.2 → staran-0.6.0}/staran/tools/date.py +0 -0
  36. {staran-0.4.2/staran/schemas → staran-0.6.0/staran/tools}/document_generator.py +0 -0
  37. {staran-0.4.2 → staran-0.6.0}/staran.egg-info/dependency_links.txt +0 -0
  38. {staran-0.4.2 → staran-0.6.0}/staran.egg-info/requires.txt +0 -0
  39. {staran-0.4.2 → staran-0.6.0}/staran.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: staran
3
- Version: 0.4.2
3
+ Version: 0.6.0
4
4
  Summary: staran - 高性能Python工具库
5
5
  Home-page: https://github.com/starlxa/staran
6
6
  Author: StarAn
@@ -24,7 +24,7 @@ Dynamic: requires-dist
24
24
  Dynamic: requires-python
25
25
  Dynamic: summary
26
26
 
27
- # Star## ✨ v0.4.2 新特性
27
+ # Star## ✨ v0.6.0 新特性
28
28
 
29
29
  - 📋 **独立Schema模块** - 专门的表结构定义和管理模块
30
30
  - 📄 **文档自动生成** - 支持Markdown/PDF/HTML格式的技术文档生成
@@ -42,7 +42,7 @@ Dynamic: summary
42
42
 
43
43
  Staran是一个强大的特征工程和数据处理工具包,提供从数据到模型的完整解决方案。特别针对工银图灵平台优化,让特征工程和模型训练变得前所未有的简单。
44
44
 
45
- ## ✨ v0.4.2 新特性
45
+ ## ✨ v0.6.0 新特性
46
46
 
47
47
  - �️ **模块化引擎架构** - 独立的引擎模块,支持Spark、Hive、图灵平台
48
48
  - 🔧 **统一接口设计** - 所有引擎提供一致的SQL生成、执行和下载接口
@@ -361,7 +361,7 @@ tomorrow = date.add_days(1) # 202504 (智能处理)
361
361
 
362
362
  ```
363
363
  staran/
364
- ├── __init__.py # 主包入口,v0.4.2功能导出
364
+ ├── __init__.py # 主包入口,v0.6.0功能导出
365
365
  ├── schemas/ # 🆕 表结构定义与文档生成模块
366
366
  │ ├── __init__.py # Schema模块入口
367
367
  │ ├── document_generator.py # 文档生成器 (MD/PDF/HTML)
@@ -385,7 +385,7 @@ staran/
385
385
  │ ├── __init__.py # 工具模块
386
386
  │ └── date.py # Date类实现
387
387
  ├── setup.py # 安装配置
388
- ├── README.md # 本文档 v0.4.2
388
+ ├── README.md # 本文档 v0.6.0
389
389
  └── quick-upload.sh # 快速部署脚本
390
390
  ```
391
391
 
@@ -561,4 +561,4 @@ MIT License
561
561
 
562
562
  ---
563
563
 
564
- **Staran v0.4.2** - 模块化引擎架构,让机器学习特征工程变得前所未有的简单 🌟
564
+ **Staran v0.6.0** - 模块化引擎架构,让机器学习特征工程变得前所未有的简单 🌟
@@ -1,4 +1,4 @@
1
- # Star## ✨ v0.4.2 新特性
1
+ # Star## ✨ v0.6.0 新特性
2
2
 
3
3
  - 📋 **独立Schema模块** - 专门的表结构定义和管理模块
4
4
  - 📄 **文档自动生成** - 支持Markdown/PDF/HTML格式的技术文档生成
@@ -16,7 +16,7 @@
16
16
 
17
17
  Staran是一个强大的特征工程和数据处理工具包,提供从数据到模型的完整解决方案。特别针对工银图灵平台优化,让特征工程和模型训练变得前所未有的简单。
18
18
 
19
- ## ✨ v0.4.2 新特性
19
+ ## ✨ v0.6.0 新特性
20
20
 
21
21
  - �️ **模块化引擎架构** - 独立的引擎模块,支持Spark、Hive、图灵平台
22
22
  - 🔧 **统一接口设计** - 所有引擎提供一致的SQL生成、执行和下载接口
@@ -335,7 +335,7 @@ tomorrow = date.add_days(1) # 202504 (智能处理)
335
335
 
336
336
  ```
337
337
  staran/
338
- ├── __init__.py # 主包入口,v0.4.2功能导出
338
+ ├── __init__.py # 主包入口,v0.6.0功能导出
339
339
  ├── schemas/ # 🆕 表结构定义与文档生成模块
340
340
  │ ├── __init__.py # Schema模块入口
341
341
  │ ├── document_generator.py # 文档生成器 (MD/PDF/HTML)
@@ -359,7 +359,7 @@ staran/
359
359
  │ ├── __init__.py # 工具模块
360
360
  │ └── date.py # Date类实现
361
361
  ├── setup.py # 安装配置
362
- ├── README.md # 本文档 v0.4.2
362
+ ├── README.md # 本文档 v0.6.0
363
363
  └── quick-upload.sh # 快速部署脚本
364
364
  ```
365
365
 
@@ -535,4 +535,4 @@ MIT License
535
535
 
536
536
  ---
537
537
 
538
- **Staran v0.4.2** - 模块化引擎架构,让机器学习特征工程变得前所未有的简单 🌟
538
+ **Staran v0.6.0** - 模块化引擎架构,让机器学习特征工程变得前所未有的简单 🌟
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
2
2
 
3
3
  setup(
4
4
  name="staran",
5
- version="0.4.2",
5
+ version="0.6.0",
6
6
  description="staran - 高性能Python工具库",
7
7
  long_description=open("README.md", encoding="utf-8").read(),
8
8
  long_description_content_type="text/markdown",
@@ -14,9 +14,11 @@ setup(
14
14
  "staran.tools",
15
15
  "staran.features",
16
16
  "staran.engines",
17
- "staran.examples",
18
17
  "staran.schemas",
19
18
  "staran.schemas.aum",
19
+ "staran.models",
20
+ "staran.banks",
21
+ "staran.banks.xinjiang_icbc",
20
22
  ],
21
23
  install_requires=[
22
24
  "datetime",
File without changes
@@ -0,0 +1,30 @@
1
+ """
2
+ staran.banks - 银行配置模块
3
+
4
+ 该模块包含不同银行的特定配置,包括:
5
+ - 数据库连接配置
6
+ - 表结构定义
7
+ - 业务规则设置
8
+ - 模型配置
9
+
10
+ 支持的银行:
11
+ - xinjiang_icbc: 新疆工行配置
12
+
13
+ 版本: 0.6.0
14
+ """
15
+
16
+ from .xinjiang_icbc import (
17
+ XinjiangICBCConfig,
18
+ get_xinjiang_icbc_tables,
19
+ get_xinjiang_icbc_models,
20
+ xinjiang_icbc_config
21
+ )
22
+
23
+ __all__ = [
24
+ 'XinjiangICBCConfig',
25
+ 'xinjiang_icbc_config',
26
+ 'get_xinjiang_icbc_tables',
27
+ 'get_xinjiang_icbc_models'
28
+ ]
29
+
30
+ __version__ = "0.6.0"
@@ -0,0 +1,90 @@
1
+ """
2
+ 新疆工行银行配置模块
3
+
4
+ 专门针对新疆工行代发长尾客户的配置:
5
+ - 数据库表结构定义(代发长尾客户专用)
6
+ - 业务规则配置
7
+ - 模型配置(提升模型和防流失模型)
8
+
9
+ 数据库: xinjiang_icbc_daifa_longtail
10
+ 业务范围: 代发长尾客户
11
+ """
12
+
13
+ from dataclasses import dataclass
14
+ from typing import Dict, List, Optional
15
+ from datetime import datetime
16
+
17
+
18
+ @dataclass
19
+ class XinjiangICBCConfig:
20
+ """新疆工行配置类"""
21
+
22
+ # 数据库配置
23
+ database_name: str = "xinjiang_icbc_daifa_longtail"
24
+ schema_name: str = "daifa_longtail"
25
+
26
+ # 业务配置
27
+ business_domain: str = "代发长尾客户"
28
+ customer_segment: str = "代发长尾"
29
+
30
+ # 模型配置
31
+ available_models: List[str] = None
32
+
33
+ # 业务规则
34
+ longtail_asset_min: float = 10000 # 长尾客户最小资产
35
+ longtail_asset_max: float = 100000 # 长尾客户最大资产
36
+ upgrade_target: float = 3000 # 提升目标金额
37
+ churn_threshold: float = 1500 # 流失阈值金额
38
+
39
+ def __post_init__(self):
40
+ if self.available_models is None:
41
+ self.available_models = [
42
+ "daifa_longtail_upgrade_3k", # 代发长尾提升3k模型
43
+ "daifa_longtail_churn_1_5k" # 代发长尾防流失1.5k模型
44
+ ]
45
+
46
+
47
+ def get_xinjiang_icbc_tables() -> Dict[str, str]:
48
+ """获取新疆工行代发长尾客户表配置"""
49
+ return {
50
+ # 代发长尾客户行为表
51
+ "daifa_longtail_behavior": "xinjiang_icbc_daifa_hlwj_dfcw_f1_f4_wy",
52
+
53
+ # 代发长尾客户资产平均表
54
+ "daifa_longtail_asset_avg": "xinjiang_icbc_daifa_hlwj_zi_chan_avg_wy",
55
+
56
+ # 代发长尾客户资产配置表
57
+ "daifa_longtail_asset_config": "xinjiang_icbc_daifa_hlwj_zi_chan_config_wy",
58
+
59
+ # 代发长尾客户月度统计表
60
+ "daifa_longtail_monthly_stat": "xinjiang_icbc_daifa_hlwj_monthly_stat_wy"
61
+ }
62
+
63
+
64
+ def get_xinjiang_icbc_models() -> Dict[str, Dict]:
65
+ """获取新疆工行代发长尾客户模型配置"""
66
+ return {
67
+ "daifa_longtail_upgrade_3k": {
68
+ "name": "代发长尾客户提升3k预测模型",
69
+ "description": "预测下个月代发长尾客户资产提升3000元的概率",
70
+ "target": "upgrade_3k_next_month",
71
+ "model_type": "binary_classification",
72
+ "business_objective": "识别有潜力提升资产的代发长尾客户",
73
+ "target_threshold": 3000,
74
+ "prediction_window": "1_month"
75
+ },
76
+
77
+ "daifa_longtail_churn_1_5k": {
78
+ "name": "代发长尾客户防流失1.5k预测模型",
79
+ "description": "预测下个月代发长尾客户流失1500元资产的风险",
80
+ "target": "churn_1_5k_next_month",
81
+ "model_type": "binary_classification",
82
+ "business_objective": "识别有流失风险的代发长尾客户",
83
+ "target_threshold": 1500,
84
+ "prediction_window": "1_month"
85
+ }
86
+ }
87
+
88
+
89
+ # 创建默认配置实例
90
+ xinjiang_icbc_config = XinjiangICBCConfig()
@@ -0,0 +1,72 @@
1
+ """
2
+ staran.models v0.6.0 - 新疆工行代发长尾客户模型管理
3
+
4
+ 专门针对新疆工行代发长尾客户的两个核心模型:
5
+ 1. 代发长尾客户提升3k预测模型 (daifa_longtail_upgrade_3k)
6
+ 2. 代发长尾客户防流失1.5k预测模型 (daifa_longtail_churn_1_5k)
7
+
8
+ 主要功能:
9
+ - 模型配置管理
10
+ - SQL驱动的目标变量定义
11
+ - 模型注册和版本控制
12
+ - 新疆工行特定配置
13
+ """
14
+
15
+ from .config import ModelConfig, create_model_config
16
+ from .target import TargetDefinition, create_target_definition
17
+ from .registry import ModelRegistry, register_model, save_model_registry
18
+ from .daifa_models import (
19
+ create_daifa_longtail_upgrade_model,
20
+ create_daifa_longtail_churn_model,
21
+ get_available_daifa_models,
22
+ create_both_daifa_models
23
+ )
24
+
25
+ # 便捷函数
26
+ def create_xinjiang_icbc_models(output_dir: str = "./xinjiang_models") -> dict:
27
+ """为新疆工行创建两个代发长尾客户模型"""
28
+ return create_both_daifa_models(output_dir)
29
+
30
+ def list_available_models() -> list:
31
+ """列出所有可用的代发长尾客户模型"""
32
+ return get_available_daifa_models()
33
+
34
+ def get_model_summary() -> dict:
35
+ """获取模型概述信息"""
36
+ return {
37
+ "version": "0.6.0",
38
+ "bank": "新疆工行",
39
+ "business_domain": "代发长尾客户",
40
+ "models": [
41
+ {
42
+ "name": "daifa_longtail_upgrade_3k",
43
+ "description": "预测下个月代发长尾客户资产提升3k的概率",
44
+ "target_amount": 3000,
45
+ "model_type": "binary_classification"
46
+ },
47
+ {
48
+ "name": "daifa_longtail_churn_1_5k",
49
+ "description": "预测下个月代发长尾客户流失1.5k资产的风险",
50
+ "target_amount": 1500,
51
+ "model_type": "binary_classification"
52
+ }
53
+ ]
54
+ }
55
+
56
+ __all__ = [
57
+ # 核心组件
58
+ 'ModelConfig', 'TargetDefinition', 'ModelRegistry',
59
+
60
+ # 创建函数
61
+ 'create_model_config', 'create_target_definition', 'register_model',
62
+
63
+ # 代发长尾模型
64
+ 'create_daifa_longtail_upgrade_model', 'create_daifa_longtail_churn_model',
65
+ 'create_both_daifa_models', 'get_available_daifa_models',
66
+
67
+ # 便捷函数
68
+ 'create_xinjiang_icbc_models', 'list_available_models', 'get_model_summary',
69
+ 'save_model_registry'
70
+ ]
71
+
72
+ __version__ = "0.6.0"
@@ -0,0 +1,271 @@
1
+ """
2
+ 模型配置管理模块
3
+
4
+ 定义模型的核心配置信息,包括模型类型、参数、特征配置等
5
+ """
6
+
7
+ from enum import Enum
8
+ from typing import Dict, Any, List, Optional
9
+ from dataclasses import dataclass, field
10
+ from datetime import datetime
11
+
12
+
13
+ class ModelType(Enum):
14
+ """模型类型枚举"""
15
+ CLASSIFICATION = "classification"
16
+ REGRESSION = "regression"
17
+ CLUSTERING = "clustering"
18
+ TIME_SERIES = "time_series"
19
+ ANOMALY_DETECTION = "anomaly_detection"
20
+ RECOMMENDATION = "recommendation"
21
+
22
+
23
+ class ModelAlgorithm(Enum):
24
+ """模型算法枚举"""
25
+ # 分类算法
26
+ LOGISTIC_REGRESSION = "logistic_regression"
27
+ RANDOM_FOREST = "random_forest"
28
+ GRADIENT_BOOSTING = "gradient_boosting"
29
+ SVM = "svm"
30
+ NEURAL_NETWORK = "neural_network"
31
+
32
+ # 回归算法
33
+ LINEAR_REGRESSION = "linear_regression"
34
+ RIDGE_REGRESSION = "ridge_regression"
35
+ LASSO_REGRESSION = "lasso_regression"
36
+
37
+ # 聚类算法
38
+ KMEANS = "kmeans"
39
+ DBSCAN = "dbscan"
40
+ HIERARCHICAL = "hierarchical"
41
+
42
+ # 时间序列
43
+ ARIMA = "arima"
44
+ LSTM = "lstm"
45
+ PROPHET = "prophet"
46
+
47
+
48
+ @dataclass
49
+ class FeatureConfig:
50
+ """特征配置"""
51
+ schema_name: str # 使用的schema名称 (如 'aum')
52
+ table_types: List[str] # 使用的表类型列表 (如 ['behavior', 'asset_avg'])
53
+ feature_selection: bool = True # 是否启用特征选择
54
+ feature_engineering: bool = True # 是否启用特征工程
55
+ scaling: bool = True # 是否启用特征缩放
56
+ encoding: Dict[str, str] = field(default_factory=dict) # 编码配置
57
+
58
+
59
+ @dataclass
60
+ class ModelConfig:
61
+ """模型配置类"""
62
+ # 基本信息
63
+ name: str # 模型名称
64
+ model_type: ModelType # 模型类型
65
+ algorithm: ModelAlgorithm # 使用的算法
66
+ version: str = "1.0.0" # 模型版本
67
+
68
+ # 特征配置
69
+ feature_config: FeatureConfig = None
70
+
71
+ # 模型参数
72
+ hyperparameters: Dict[str, Any] = field(default_factory=dict)
73
+
74
+ # 训练配置
75
+ training_config: Dict[str, Any] = field(default_factory=lambda: {
76
+ 'test_size': 0.2,
77
+ 'random_state': 42,
78
+ 'cross_validation': True,
79
+ 'cv_folds': 5
80
+ })
81
+
82
+ # 评估配置
83
+ evaluation_metrics: List[str] = field(default_factory=list)
84
+
85
+ # 银行特定配置
86
+ bank_code: str = "generic" # 银行代码
87
+ business_domain: str = "generic" # 业务领域
88
+
89
+ # 元数据
90
+ description: str = "" # 模型描述
91
+ created_at: datetime = field(default_factory=datetime.now)
92
+ created_by: str = "system" # 创建者
93
+ tags: List[str] = field(default_factory=list)
94
+
95
+ # 部署配置
96
+ deployment_config: Dict[str, Any] = field(default_factory=dict)
97
+
98
+ def __post_init__(self):
99
+ """初始化后处理"""
100
+ if self.feature_config is None:
101
+ self.feature_config = FeatureConfig(
102
+ schema_name="generic",
103
+ table_types=["base"]
104
+ )
105
+
106
+ # 根据模型类型设置默认评估指标
107
+ if not self.evaluation_metrics:
108
+ self.evaluation_metrics = self._get_default_metrics()
109
+
110
+ def _get_default_metrics(self) -> List[str]:
111
+ """根据模型类型获取默认评估指标"""
112
+ if self.model_type == ModelType.CLASSIFICATION:
113
+ return ['accuracy', 'precision', 'recall', 'f1_score', 'auc']
114
+ elif self.model_type == ModelType.REGRESSION:
115
+ return ['mae', 'mse', 'rmse', 'r2_score']
116
+ elif self.model_type == ModelType.CLUSTERING:
117
+ return ['silhouette_score', 'calinski_harabasz_score']
118
+ else:
119
+ return ['custom_metric']
120
+
121
+ def to_dict(self) -> Dict[str, Any]:
122
+ """转换为字典格式"""
123
+ return {
124
+ 'name': self.name,
125
+ 'model_type': self.model_type.value,
126
+ 'algorithm': self.algorithm.value,
127
+ 'version': self.version,
128
+ 'feature_config': {
129
+ 'schema_name': self.feature_config.schema_name,
130
+ 'table_types': self.feature_config.table_types,
131
+ 'feature_selection': self.feature_config.feature_selection,
132
+ 'feature_engineering': self.feature_config.feature_engineering,
133
+ 'scaling': self.feature_config.scaling,
134
+ 'encoding': self.feature_config.encoding
135
+ },
136
+ 'hyperparameters': self.hyperparameters,
137
+ 'training_config': self.training_config,
138
+ 'evaluation_metrics': self.evaluation_metrics,
139
+ 'bank_code': self.bank_code,
140
+ 'business_domain': self.business_domain,
141
+ 'description': self.description,
142
+ 'created_at': self.created_at.isoformat(),
143
+ 'created_by': self.created_by,
144
+ 'tags': self.tags,
145
+ 'deployment_config': self.deployment_config
146
+ }
147
+
148
+ @classmethod
149
+ def from_dict(cls, data: Dict[str, Any]) -> 'ModelConfig':
150
+ """从字典创建ModelConfig实例"""
151
+ feature_config_data = data.get('feature_config', {})
152
+ feature_config = FeatureConfig(
153
+ schema_name=feature_config_data.get('schema_name', 'generic'),
154
+ table_types=feature_config_data.get('table_types', ['base']),
155
+ feature_selection=feature_config_data.get('feature_selection', True),
156
+ feature_engineering=feature_config_data.get('feature_engineering', True),
157
+ scaling=feature_config_data.get('scaling', True),
158
+ encoding=feature_config_data.get('encoding', {})
159
+ )
160
+
161
+ return cls(
162
+ name=data['name'],
163
+ model_type=ModelType(data['model_type']),
164
+ algorithm=ModelAlgorithm(data['algorithm']),
165
+ version=data.get('version', '1.0.0'),
166
+ feature_config=feature_config,
167
+ hyperparameters=data.get('hyperparameters', {}),
168
+ training_config=data.get('training_config', {}),
169
+ evaluation_metrics=data.get('evaluation_metrics', []),
170
+ bank_code=data.get('bank_code', 'generic'),
171
+ business_domain=data.get('business_domain', 'generic'),
172
+ description=data.get('description', ''),
173
+ created_by=data.get('created_by', 'system'),
174
+ tags=data.get('tags', []),
175
+ deployment_config=data.get('deployment_config', {})
176
+ )
177
+
178
+
179
+ def create_model_config(
180
+ name: str,
181
+ model_type: str,
182
+ algorithm: str,
183
+ schema_name: str = "generic",
184
+ table_types: List[str] = None,
185
+ bank_code: str = "generic",
186
+ **kwargs
187
+ ) -> ModelConfig:
188
+ """
189
+ 创建模型配置的便捷函数
190
+
191
+ Args:
192
+ name: 模型名称
193
+ model_type: 模型类型
194
+ algorithm: 算法名称
195
+ schema_name: 使用的schema名称
196
+ table_types: 使用的表类型列表
197
+ bank_code: 银行代码
198
+ **kwargs: 其他配置参数
199
+
200
+ Returns:
201
+ ModelConfig实例
202
+ """
203
+ if table_types is None:
204
+ table_types = ["base"]
205
+
206
+ feature_config = FeatureConfig(
207
+ schema_name=schema_name,
208
+ table_types=table_types
209
+ )
210
+
211
+ return ModelConfig(
212
+ name=name,
213
+ model_type=ModelType(model_type),
214
+ algorithm=ModelAlgorithm(algorithm),
215
+ feature_config=feature_config,
216
+ bank_code=bank_code,
217
+ **kwargs
218
+ )
219
+
220
+
221
+ # 预定义的模型配置模板
222
+ PRESET_CONFIGS = {
223
+ "aum_longtail_classification": {
224
+ "model_type": "classification",
225
+ "algorithm": "random_forest",
226
+ "schema_name": "aum",
227
+ "table_types": ["behavior", "asset_avg", "asset_config", "monthly_stat"],
228
+ "hyperparameters": {
229
+ "n_estimators": 100,
230
+ "max_depth": 10,
231
+ "random_state": 42
232
+ },
233
+ "description": "AUM长尾客户分类模型"
234
+ },
235
+
236
+ "customer_value_regression": {
237
+ "model_type": "regression",
238
+ "algorithm": "gradient_boosting",
239
+ "schema_name": "aum",
240
+ "table_types": ["behavior", "asset_avg"],
241
+ "hyperparameters": {
242
+ "n_estimators": 150,
243
+ "learning_rate": 0.1,
244
+ "max_depth": 8
245
+ },
246
+ "description": "客户价值预测回归模型"
247
+ }
248
+ }
249
+
250
+
251
+ def create_preset_config(preset_name: str, **overrides) -> ModelConfig:
252
+ """
253
+ 基于预设模板创建模型配置
254
+
255
+ Args:
256
+ preset_name: 预设模板名称
257
+ **overrides: 覆盖的配置参数
258
+
259
+ Returns:
260
+ ModelConfig实例
261
+ """
262
+ if preset_name not in PRESET_CONFIGS:
263
+ raise ValueError(f"未知的预设配置: {preset_name}")
264
+
265
+ config = PRESET_CONFIGS[preset_name].copy()
266
+ config.update(overrides)
267
+
268
+ return create_model_config(
269
+ name=preset_name,
270
+ **config
271
+ )