staran 0.5.0__tar.gz → 0.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {staran-0.5.0/staran.egg-info → staran-0.6.0}/PKG-INFO +6 -6
- {staran-0.5.0 → staran-0.6.0}/README.md +5 -5
- {staran-0.5.0 → staran-0.6.0}/setup.py +3 -2
- staran-0.6.0/staran/__init__.py +0 -0
- staran-0.6.0/staran/banks/__init__.py +30 -0
- staran-0.6.0/staran/banks/xinjiang_icbc/__init__.py +90 -0
- staran-0.6.0/staran/models/__init__.py +72 -0
- staran-0.6.0/staran/models/daifa_models.py +361 -0
- staran-0.6.0/staran/schemas/__init__.py +27 -0
- staran-0.6.0/staran/schemas/aum/__init__.py +210 -0
- {staran-0.5.0 → staran-0.6.0/staran.egg-info}/PKG-INFO +6 -6
- {staran-0.5.0 → staran-0.6.0}/staran.egg-info/SOURCES.txt +5 -6
- staran-0.5.0/staran/__init__.py +0 -261
- staran-0.5.0/staran/examples/__init__.py +0 -8
- staran-0.5.0/staran/examples/aum_longtail.py +0 -250
- staran-0.5.0/staran/examples/aum_longtail_old.py +0 -487
- staran-0.5.0/staran/models/__init__.py +0 -81
- staran-0.5.0/staran/models/bank_configs.py +0 -269
- staran-0.5.0/staran/schemas/__init__.py +0 -28
- staran-0.5.0/staran/schemas/aum/__init__.py +0 -306
- {staran-0.5.0 → staran-0.6.0}/LICENSE +0 -0
- {staran-0.5.0 → staran-0.6.0}/setup.cfg +0 -0
- {staran-0.5.0 → staran-0.6.0}/staran/engines/__init__.py +0 -0
- {staran-0.5.0 → staran-0.6.0}/staran/engines/base.py +0 -0
- {staran-0.5.0 → staran-0.6.0}/staran/engines/hive.py +0 -0
- {staran-0.5.0 → staran-0.6.0}/staran/engines/spark.py +0 -0
- {staran-0.5.0 → staran-0.6.0}/staran/engines/turing.py +0 -0
- {staran-0.5.0 → staran-0.6.0}/staran/features/__init__.py +0 -0
- {staran-0.5.0 → staran-0.6.0}/staran/features/engines.py +0 -0
- {staran-0.5.0 → staran-0.6.0}/staran/features/generator.py +0 -0
- {staran-0.5.0 → staran-0.6.0}/staran/features/manager.py +0 -0
- {staran-0.5.0 → staran-0.6.0}/staran/features/schema.py +0 -0
- {staran-0.5.0 → staran-0.6.0}/staran/models/config.py +0 -0
- {staran-0.5.0 → staran-0.6.0}/staran/models/registry.py +0 -0
- {staran-0.5.0 → staran-0.6.0}/staran/models/target.py +0 -0
- {staran-0.5.0 → staran-0.6.0}/staran/tools/__init__.py +0 -0
- {staran-0.5.0 → staran-0.6.0}/staran/tools/date.py +0 -0
- {staran-0.5.0/staran/schemas → staran-0.6.0/staran/tools}/document_generator.py +0 -0
- {staran-0.5.0 → staran-0.6.0}/staran.egg-info/dependency_links.txt +0 -0
- {staran-0.5.0 → staran-0.6.0}/staran.egg-info/requires.txt +0 -0
- {staran-0.5.0 → staran-0.6.0}/staran.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: staran
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.6.0
|
4
4
|
Summary: staran - 高性能Python工具库
|
5
5
|
Home-page: https://github.com/starlxa/staran
|
6
6
|
Author: StarAn
|
@@ -24,7 +24,7 @@ Dynamic: requires-dist
|
|
24
24
|
Dynamic: requires-python
|
25
25
|
Dynamic: summary
|
26
26
|
|
27
|
-
# Star## ✨ v0.
|
27
|
+
# Star## ✨ v0.6.0 新特性
|
28
28
|
|
29
29
|
- 📋 **独立Schema模块** - 专门的表结构定义和管理模块
|
30
30
|
- 📄 **文档自动生成** - 支持Markdown/PDF/HTML格式的技术文档生成
|
@@ -42,7 +42,7 @@ Dynamic: summary
|
|
42
42
|
|
43
43
|
Staran是一个强大的特征工程和数据处理工具包,提供从数据到模型的完整解决方案。特别针对工银图灵平台优化,让特征工程和模型训练变得前所未有的简单。
|
44
44
|
|
45
|
-
## ✨ v0.
|
45
|
+
## ✨ v0.6.0 新特性
|
46
46
|
|
47
47
|
- �️ **模块化引擎架构** - 独立的引擎模块,支持Spark、Hive、图灵平台
|
48
48
|
- 🔧 **统一接口设计** - 所有引擎提供一致的SQL生成、执行和下载接口
|
@@ -361,7 +361,7 @@ tomorrow = date.add_days(1) # 202504 (智能处理)
|
|
361
361
|
|
362
362
|
```
|
363
363
|
staran/
|
364
|
-
├── __init__.py # 主包入口,v0.
|
364
|
+
├── __init__.py # 主包入口,v0.6.0功能导出
|
365
365
|
├── schemas/ # 🆕 表结构定义与文档生成模块
|
366
366
|
│ ├── __init__.py # Schema模块入口
|
367
367
|
│ ├── document_generator.py # 文档生成器 (MD/PDF/HTML)
|
@@ -385,7 +385,7 @@ staran/
|
|
385
385
|
│ ├── __init__.py # 工具模块
|
386
386
|
│ └── date.py # Date类实现
|
387
387
|
├── setup.py # 安装配置
|
388
|
-
├── README.md # 本文档 v0.
|
388
|
+
├── README.md # 本文档 v0.6.0
|
389
389
|
└── quick-upload.sh # 快速部署脚本
|
390
390
|
```
|
391
391
|
|
@@ -561,4 +561,4 @@ MIT License
|
|
561
561
|
|
562
562
|
---
|
563
563
|
|
564
|
-
**Staran v0.
|
564
|
+
**Staran v0.6.0** - 模块化引擎架构,让机器学习特征工程变得前所未有的简单 🌟
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Star## ✨ v0.
|
1
|
+
# Star## ✨ v0.6.0 新特性
|
2
2
|
|
3
3
|
- 📋 **独立Schema模块** - 专门的表结构定义和管理模块
|
4
4
|
- 📄 **文档自动生成** - 支持Markdown/PDF/HTML格式的技术文档生成
|
@@ -16,7 +16,7 @@
|
|
16
16
|
|
17
17
|
Staran是一个强大的特征工程和数据处理工具包,提供从数据到模型的完整解决方案。特别针对工银图灵平台优化,让特征工程和模型训练变得前所未有的简单。
|
18
18
|
|
19
|
-
## ✨ v0.
|
19
|
+
## ✨ v0.6.0 新特性
|
20
20
|
|
21
21
|
- �️ **模块化引擎架构** - 独立的引擎模块,支持Spark、Hive、图灵平台
|
22
22
|
- 🔧 **统一接口设计** - 所有引擎提供一致的SQL生成、执行和下载接口
|
@@ -335,7 +335,7 @@ tomorrow = date.add_days(1) # 202504 (智能处理)
|
|
335
335
|
|
336
336
|
```
|
337
337
|
staran/
|
338
|
-
├── __init__.py # 主包入口,v0.
|
338
|
+
├── __init__.py # 主包入口,v0.6.0功能导出
|
339
339
|
├── schemas/ # 🆕 表结构定义与文档生成模块
|
340
340
|
│ ├── __init__.py # Schema模块入口
|
341
341
|
│ ├── document_generator.py # 文档生成器 (MD/PDF/HTML)
|
@@ -359,7 +359,7 @@ staran/
|
|
359
359
|
│ ├── __init__.py # 工具模块
|
360
360
|
│ └── date.py # Date类实现
|
361
361
|
├── setup.py # 安装配置
|
362
|
-
├── README.md # 本文档 v0.
|
362
|
+
├── README.md # 本文档 v0.6.0
|
363
363
|
└── quick-upload.sh # 快速部署脚本
|
364
364
|
```
|
365
365
|
|
@@ -535,4 +535,4 @@ MIT License
|
|
535
535
|
|
536
536
|
---
|
537
537
|
|
538
|
-
**Staran v0.
|
538
|
+
**Staran v0.6.0** - 模块化引擎架构,让机器学习特征工程变得前所未有的简单 🌟
|
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
|
|
2
2
|
|
3
3
|
setup(
|
4
4
|
name="staran",
|
5
|
-
version="0.
|
5
|
+
version="0.6.0",
|
6
6
|
description="staran - 高性能Python工具库",
|
7
7
|
long_description=open("README.md", encoding="utf-8").read(),
|
8
8
|
long_description_content_type="text/markdown",
|
@@ -14,10 +14,11 @@ setup(
|
|
14
14
|
"staran.tools",
|
15
15
|
"staran.features",
|
16
16
|
"staran.engines",
|
17
|
-
"staran.examples",
|
18
17
|
"staran.schemas",
|
19
18
|
"staran.schemas.aum",
|
20
19
|
"staran.models",
|
20
|
+
"staran.banks",
|
21
|
+
"staran.banks.xinjiang_icbc",
|
21
22
|
],
|
22
23
|
install_requires=[
|
23
24
|
"datetime",
|
File without changes
|
@@ -0,0 +1,30 @@
|
|
1
|
+
"""
|
2
|
+
staran.banks - 银行配置模块
|
3
|
+
|
4
|
+
该模块包含不同银行的特定配置,包括:
|
5
|
+
- 数据库连接配置
|
6
|
+
- 表结构定义
|
7
|
+
- 业务规则设置
|
8
|
+
- 模型配置
|
9
|
+
|
10
|
+
支持的银行:
|
11
|
+
- xinjiang_icbc: 新疆工行配置
|
12
|
+
|
13
|
+
版本: 0.6.0
|
14
|
+
"""
|
15
|
+
|
16
|
+
from .xinjiang_icbc import (
|
17
|
+
XinjiangICBCConfig,
|
18
|
+
get_xinjiang_icbc_tables,
|
19
|
+
get_xinjiang_icbc_models,
|
20
|
+
xinjiang_icbc_config
|
21
|
+
)
|
22
|
+
|
23
|
+
__all__ = [
|
24
|
+
'XinjiangICBCConfig',
|
25
|
+
'xinjiang_icbc_config',
|
26
|
+
'get_xinjiang_icbc_tables',
|
27
|
+
'get_xinjiang_icbc_models'
|
28
|
+
]
|
29
|
+
|
30
|
+
__version__ = "0.6.0"
|
@@ -0,0 +1,90 @@
|
|
1
|
+
"""
|
2
|
+
新疆工行银行配置模块
|
3
|
+
|
4
|
+
专门针对新疆工行代发长尾客户的配置:
|
5
|
+
- 数据库表结构定义(代发长尾客户专用)
|
6
|
+
- 业务规则配置
|
7
|
+
- 模型配置(提升模型和防流失模型)
|
8
|
+
|
9
|
+
数据库: xinjiang_icbc_daifa_longtail
|
10
|
+
业务范围: 代发长尾客户
|
11
|
+
"""
|
12
|
+
|
13
|
+
from dataclasses import dataclass
|
14
|
+
from typing import Dict, List, Optional
|
15
|
+
from datetime import datetime
|
16
|
+
|
17
|
+
|
18
|
+
@dataclass
|
19
|
+
class XinjiangICBCConfig:
|
20
|
+
"""新疆工行配置类"""
|
21
|
+
|
22
|
+
# 数据库配置
|
23
|
+
database_name: str = "xinjiang_icbc_daifa_longtail"
|
24
|
+
schema_name: str = "daifa_longtail"
|
25
|
+
|
26
|
+
# 业务配置
|
27
|
+
business_domain: str = "代发长尾客户"
|
28
|
+
customer_segment: str = "代发长尾"
|
29
|
+
|
30
|
+
# 模型配置
|
31
|
+
available_models: List[str] = None
|
32
|
+
|
33
|
+
# 业务规则
|
34
|
+
longtail_asset_min: float = 10000 # 长尾客户最小资产
|
35
|
+
longtail_asset_max: float = 100000 # 长尾客户最大资产
|
36
|
+
upgrade_target: float = 3000 # 提升目标金额
|
37
|
+
churn_threshold: float = 1500 # 流失阈值金额
|
38
|
+
|
39
|
+
def __post_init__(self):
|
40
|
+
if self.available_models is None:
|
41
|
+
self.available_models = [
|
42
|
+
"daifa_longtail_upgrade_3k", # 代发长尾提升3k模型
|
43
|
+
"daifa_longtail_churn_1_5k" # 代发长尾防流失1.5k模型
|
44
|
+
]
|
45
|
+
|
46
|
+
|
47
|
+
def get_xinjiang_icbc_tables() -> Dict[str, str]:
|
48
|
+
"""获取新疆工行代发长尾客户表配置"""
|
49
|
+
return {
|
50
|
+
# 代发长尾客户行为表
|
51
|
+
"daifa_longtail_behavior": "xinjiang_icbc_daifa_hlwj_dfcw_f1_f4_wy",
|
52
|
+
|
53
|
+
# 代发长尾客户资产平均表
|
54
|
+
"daifa_longtail_asset_avg": "xinjiang_icbc_daifa_hlwj_zi_chan_avg_wy",
|
55
|
+
|
56
|
+
# 代发长尾客户资产配置表
|
57
|
+
"daifa_longtail_asset_config": "xinjiang_icbc_daifa_hlwj_zi_chan_config_wy",
|
58
|
+
|
59
|
+
# 代发长尾客户月度统计表
|
60
|
+
"daifa_longtail_monthly_stat": "xinjiang_icbc_daifa_hlwj_monthly_stat_wy"
|
61
|
+
}
|
62
|
+
|
63
|
+
|
64
|
+
def get_xinjiang_icbc_models() -> Dict[str, Dict]:
|
65
|
+
"""获取新疆工行代发长尾客户模型配置"""
|
66
|
+
return {
|
67
|
+
"daifa_longtail_upgrade_3k": {
|
68
|
+
"name": "代发长尾客户提升3k预测模型",
|
69
|
+
"description": "预测下个月代发长尾客户资产提升3000元的概率",
|
70
|
+
"target": "upgrade_3k_next_month",
|
71
|
+
"model_type": "binary_classification",
|
72
|
+
"business_objective": "识别有潜力提升资产的代发长尾客户",
|
73
|
+
"target_threshold": 3000,
|
74
|
+
"prediction_window": "1_month"
|
75
|
+
},
|
76
|
+
|
77
|
+
"daifa_longtail_churn_1_5k": {
|
78
|
+
"name": "代发长尾客户防流失1.5k预测模型",
|
79
|
+
"description": "预测下个月代发长尾客户流失1500元资产的风险",
|
80
|
+
"target": "churn_1_5k_next_month",
|
81
|
+
"model_type": "binary_classification",
|
82
|
+
"business_objective": "识别有流失风险的代发长尾客户",
|
83
|
+
"target_threshold": 1500,
|
84
|
+
"prediction_window": "1_month"
|
85
|
+
}
|
86
|
+
}
|
87
|
+
|
88
|
+
|
89
|
+
# 创建默认配置实例
|
90
|
+
xinjiang_icbc_config = XinjiangICBCConfig()
|
@@ -0,0 +1,72 @@
|
|
1
|
+
"""
|
2
|
+
staran.models v0.6.0 - 新疆工行代发长尾客户模型管理
|
3
|
+
|
4
|
+
专门针对新疆工行代发长尾客户的两个核心模型:
|
5
|
+
1. 代发长尾客户提升3k预测模型 (daifa_longtail_upgrade_3k)
|
6
|
+
2. 代发长尾客户防流失1.5k预测模型 (daifa_longtail_churn_1_5k)
|
7
|
+
|
8
|
+
主要功能:
|
9
|
+
- 模型配置管理
|
10
|
+
- SQL驱动的目标变量定义
|
11
|
+
- 模型注册和版本控制
|
12
|
+
- 新疆工行特定配置
|
13
|
+
"""
|
14
|
+
|
15
|
+
from .config import ModelConfig, create_model_config
|
16
|
+
from .target import TargetDefinition, create_target_definition
|
17
|
+
from .registry import ModelRegistry, register_model, save_model_registry
|
18
|
+
from .daifa_models import (
|
19
|
+
create_daifa_longtail_upgrade_model,
|
20
|
+
create_daifa_longtail_churn_model,
|
21
|
+
get_available_daifa_models,
|
22
|
+
create_both_daifa_models
|
23
|
+
)
|
24
|
+
|
25
|
+
# 便捷函数
|
26
|
+
def create_xinjiang_icbc_models(output_dir: str = "./xinjiang_models") -> dict:
|
27
|
+
"""为新疆工行创建两个代发长尾客户模型"""
|
28
|
+
return create_both_daifa_models(output_dir)
|
29
|
+
|
30
|
+
def list_available_models() -> list:
|
31
|
+
"""列出所有可用的代发长尾客户模型"""
|
32
|
+
return get_available_daifa_models()
|
33
|
+
|
34
|
+
def get_model_summary() -> dict:
|
35
|
+
"""获取模型概述信息"""
|
36
|
+
return {
|
37
|
+
"version": "0.6.0",
|
38
|
+
"bank": "新疆工行",
|
39
|
+
"business_domain": "代发长尾客户",
|
40
|
+
"models": [
|
41
|
+
{
|
42
|
+
"name": "daifa_longtail_upgrade_3k",
|
43
|
+
"description": "预测下个月代发长尾客户资产提升3k的概率",
|
44
|
+
"target_amount": 3000,
|
45
|
+
"model_type": "binary_classification"
|
46
|
+
},
|
47
|
+
{
|
48
|
+
"name": "daifa_longtail_churn_1_5k",
|
49
|
+
"description": "预测下个月代发长尾客户流失1.5k资产的风险",
|
50
|
+
"target_amount": 1500,
|
51
|
+
"model_type": "binary_classification"
|
52
|
+
}
|
53
|
+
]
|
54
|
+
}
|
55
|
+
|
56
|
+
__all__ = [
|
57
|
+
# 核心组件
|
58
|
+
'ModelConfig', 'TargetDefinition', 'ModelRegistry',
|
59
|
+
|
60
|
+
# 创建函数
|
61
|
+
'create_model_config', 'create_target_definition', 'register_model',
|
62
|
+
|
63
|
+
# 代发长尾模型
|
64
|
+
'create_daifa_longtail_upgrade_model', 'create_daifa_longtail_churn_model',
|
65
|
+
'create_both_daifa_models', 'get_available_daifa_models',
|
66
|
+
|
67
|
+
# 便捷函数
|
68
|
+
'create_xinjiang_icbc_models', 'list_available_models', 'get_model_summary',
|
69
|
+
'save_model_registry'
|
70
|
+
]
|
71
|
+
|
72
|
+
__version__ = "0.6.0"
|
@@ -0,0 +1,361 @@
|
|
1
|
+
"""
|
2
|
+
新疆工行代发长尾客户专用模型定义
|
3
|
+
|
4
|
+
包含两个核心模型:
|
5
|
+
1. 代发长尾客户提升3k预测模型
|
6
|
+
2. 代发长尾客户防流失1.5k预测模型
|
7
|
+
|
8
|
+
基于新疆工行代发长尾客户数据库和业务规则
|
9
|
+
"""
|
10
|
+
|
11
|
+
from typing import Dict, List
|
12
|
+
from .config import create_model_config
|
13
|
+
from .target import create_target_definition
|
14
|
+
from .registry import ModelRegistry, register_model
|
15
|
+
import os
|
16
|
+
import json
|
17
|
+
from datetime import datetime
|
18
|
+
|
19
|
+
|
20
|
+
def save_model_registry(output_path: str):
|
21
|
+
"""保存模型注册信息到文件"""
|
22
|
+
|
23
|
+
def convert_to_serializable(obj):
|
24
|
+
"""递归转换对象为可序列化格式"""
|
25
|
+
if isinstance(obj, datetime):
|
26
|
+
return obj.isoformat()
|
27
|
+
elif hasattr(obj, '__dict__'):
|
28
|
+
result = {}
|
29
|
+
for key, value in obj.__dict__.items():
|
30
|
+
result[key] = convert_to_serializable(value)
|
31
|
+
return result
|
32
|
+
elif hasattr(obj, 'value'): # 枚举类型
|
33
|
+
return obj.value
|
34
|
+
elif isinstance(obj, (list, tuple)):
|
35
|
+
return [convert_to_serializable(item) for item in obj]
|
36
|
+
elif isinstance(obj, dict):
|
37
|
+
return {k: convert_to_serializable(v) for k, v in obj.items()}
|
38
|
+
else:
|
39
|
+
return obj
|
40
|
+
|
41
|
+
data = {
|
42
|
+
"models": {},
|
43
|
+
"version_history": {},
|
44
|
+
"saved_at": str(datetime.now())
|
45
|
+
}
|
46
|
+
|
47
|
+
# 获取所有注册的模型
|
48
|
+
for model_id, entry in ModelRegistry._models.items():
|
49
|
+
data["models"][model_id] = {
|
50
|
+
"model_config": convert_to_serializable(entry.model_config),
|
51
|
+
"target_definition": convert_to_serializable(entry.target_definition),
|
52
|
+
"registered_at": entry.registered_at.isoformat(),
|
53
|
+
"status": entry.status,
|
54
|
+
"performance_metrics": entry.performance_metrics
|
55
|
+
}
|
56
|
+
|
57
|
+
data["version_history"] = ModelRegistry._version_history.copy()
|
58
|
+
|
59
|
+
with open(output_path, 'w', encoding='utf-8') as f:
|
60
|
+
json.dump(data, f, indent=2, ensure_ascii=False)
|
61
|
+
|
62
|
+
print(f"✅ 模型注册信息已保存到: {output_path}")
|
63
|
+
return output_path
|
64
|
+
|
65
|
+
|
66
|
+
def create_daifa_longtail_upgrade_model() -> Dict:
|
67
|
+
"""创建代发长尾客户提升3k预测模型"""
|
68
|
+
|
69
|
+
# 模型配置
|
70
|
+
model_config = create_model_config(
|
71
|
+
name="xinjiang_icbc_daifa_longtail_upgrade_3k",
|
72
|
+
model_type="classification",
|
73
|
+
algorithm="gradient_boosting",
|
74
|
+
version="1.0.0",
|
75
|
+
schema_name="daifa_longtail",
|
76
|
+
table_types=["daifa_longtail_behavior", "daifa_longtail_asset_avg",
|
77
|
+
"daifa_longtail_asset_config", "daifa_longtail_monthly_stat"],
|
78
|
+
hyperparameters={
|
79
|
+
"n_estimators": 300,
|
80
|
+
"learning_rate": 0.05,
|
81
|
+
"max_depth": 12,
|
82
|
+
"min_samples_split": 20,
|
83
|
+
"min_samples_leaf": 10,
|
84
|
+
"subsample": 0.8,
|
85
|
+
"random_state": 42
|
86
|
+
},
|
87
|
+
bank_code="xinjiang_icbc",
|
88
|
+
business_domain="代发长尾客户",
|
89
|
+
description="新疆工行代发长尾客户下个月资产提升3k预测模型",
|
90
|
+
tags=["daifa", "longtail", "upgrade", "3k", "xinjiang_icbc"]
|
91
|
+
)
|
92
|
+
|
93
|
+
# 目标定义 - 预测下个月提升3k
|
94
|
+
target_definition = create_target_definition(
|
95
|
+
name="daifa_longtail_upgrade_3k_target",
|
96
|
+
target_type="binary_classification",
|
97
|
+
description="新疆工行代发长尾客户下个月资产提升3000元预测目标",
|
98
|
+
sql_query="""
|
99
|
+
WITH customer_baseline AS (
|
100
|
+
-- 获取代发长尾客户基础信息(当月)
|
101
|
+
SELECT
|
102
|
+
b.party_id,
|
103
|
+
b.asset_total_bal as current_asset,
|
104
|
+
b.salary_amount as current_salary,
|
105
|
+
b.longtail_score,
|
106
|
+
b.upgrade_potential,
|
107
|
+
CASE
|
108
|
+
WHEN b.asset_total_bal BETWEEN 10000 AND 100000 THEN 1
|
109
|
+
ELSE 0
|
110
|
+
END as is_daifa_longtail
|
111
|
+
FROM xinjiang_icbc_daifa_hlwj_monthly_stat_wy b
|
112
|
+
WHERE b.data_dt = '{baseline_date}'
|
113
|
+
),
|
114
|
+
|
115
|
+
next_month_performance AS (
|
116
|
+
-- 计算下个月的资产变化
|
117
|
+
SELECT
|
118
|
+
party_id,
|
119
|
+
asset_total_bal as next_month_asset,
|
120
|
+
salary_amount as next_month_salary,
|
121
|
+
monthly_deposit_amount,
|
122
|
+
monthly_withdraw_amount
|
123
|
+
FROM xinjiang_icbc_daifa_hlwj_monthly_stat_wy
|
124
|
+
WHERE data_dt = '{next_month_date}'
|
125
|
+
),
|
126
|
+
|
127
|
+
asset_change AS (
|
128
|
+
-- 计算资产变化情况
|
129
|
+
SELECT
|
130
|
+
cb.party_id,
|
131
|
+
cb.current_asset,
|
132
|
+
nmp.next_month_asset,
|
133
|
+
(nmp.next_month_asset - cb.current_asset) as asset_change,
|
134
|
+
nmp.monthly_deposit_amount,
|
135
|
+
cb.upgrade_potential
|
136
|
+
FROM customer_baseline cb
|
137
|
+
INNER JOIN next_month_performance nmp ON cb.party_id = nmp.party_id
|
138
|
+
WHERE cb.is_daifa_longtail = 1 -- 只关注代发长尾客户
|
139
|
+
)
|
140
|
+
|
141
|
+
SELECT
|
142
|
+
party_id,
|
143
|
+
CASE
|
144
|
+
-- 代发长尾客户资产提升3k的判断标准
|
145
|
+
WHEN asset_change >= 3000 -- 资产增长达到3000元
|
146
|
+
AND monthly_deposit_amount > asset_change * 0.7 -- 主要通过存入实现
|
147
|
+
AND upgrade_potential >= 0.6 -- 提升潜力评分较高
|
148
|
+
THEN 1
|
149
|
+
ELSE 0
|
150
|
+
END as upgrade_3k_target,
|
151
|
+
|
152
|
+
-- 辅助分析字段
|
153
|
+
current_asset,
|
154
|
+
next_month_asset,
|
155
|
+
asset_change,
|
156
|
+
monthly_deposit_amount,
|
157
|
+
upgrade_potential
|
158
|
+
|
159
|
+
FROM asset_change
|
160
|
+
""",
|
161
|
+
target_column="upgrade_3k_target",
|
162
|
+
class_labels=["no_upgrade", "upgrade_3k"],
|
163
|
+
class_weights={"no_upgrade": 1.0, "upgrade_3k": 2.5}, # 提升类样本权重更高
|
164
|
+
time_window="1_month",
|
165
|
+
prediction_horizon="1_month",
|
166
|
+
bank_code="xinjiang_icbc",
|
167
|
+
business_rules={
|
168
|
+
"min_asset_threshold": 10000, # 代发长尾最小资产
|
169
|
+
"max_asset_threshold": 100000, # 代发长尾最大资产
|
170
|
+
"upgrade_target_amount": 3000, # 提升目标金额
|
171
|
+
"deposit_contribution_ratio": 0.7, # 存入贡献占比
|
172
|
+
"min_upgrade_potential": 0.6 # 最小提升潜力
|
173
|
+
}
|
174
|
+
)
|
175
|
+
|
176
|
+
return {
|
177
|
+
"model_config": model_config,
|
178
|
+
"target_definition": target_definition,
|
179
|
+
"model_type": "upgrade_prediction"
|
180
|
+
}
|
181
|
+
|
182
|
+
|
183
|
+
def create_daifa_longtail_churn_model() -> Dict:
|
184
|
+
"""创建代发长尾客户防流失1.5k预测模型"""
|
185
|
+
|
186
|
+
# 模型配置
|
187
|
+
model_config = create_model_config(
|
188
|
+
name="xinjiang_icbc_daifa_longtail_churn_1_5k",
|
189
|
+
model_type="classification",
|
190
|
+
algorithm="random_forest", # 防流失模型使用随机森林
|
191
|
+
version="1.0.0",
|
192
|
+
schema_name="daifa_longtail",
|
193
|
+
table_types=["daifa_longtail_behavior", "daifa_longtail_asset_avg",
|
194
|
+
"daifa_longtail_asset_config", "daifa_longtail_monthly_stat"],
|
195
|
+
hyperparameters={
|
196
|
+
"n_estimators": 200,
|
197
|
+
"max_depth": 10,
|
198
|
+
"min_samples_split": 15,
|
199
|
+
"min_samples_leaf": 8,
|
200
|
+
"max_features": "sqrt",
|
201
|
+
"random_state": 42,
|
202
|
+
"class_weight": "balanced" # 处理不平衡数据
|
203
|
+
},
|
204
|
+
bank_code="xinjiang_icbc",
|
205
|
+
business_domain="代发长尾客户",
|
206
|
+
description="新疆工行代发长尾客户下个月流失1.5k资产风险预测模型",
|
207
|
+
tags=["daifa", "longtail", "churn", "1_5k", "risk_prevention"]
|
208
|
+
)
|
209
|
+
|
210
|
+
# 目标定义 - 预测下个月流失1.5k风险
|
211
|
+
target_definition = create_target_definition(
|
212
|
+
name="daifa_longtail_churn_1_5k_target",
|
213
|
+
target_type="binary_classification",
|
214
|
+
description="新疆工行代发长尾客户下个月流失1500元资产风险预测目标",
|
215
|
+
sql_query="""
|
216
|
+
WITH customer_baseline AS (
|
217
|
+
-- 获取代发长尾客户基础信息(当月)
|
218
|
+
SELECT
|
219
|
+
b.party_id,
|
220
|
+
b.asset_total_bal as current_asset,
|
221
|
+
b.salary_amount as current_salary,
|
222
|
+
b.longtail_score,
|
223
|
+
b.churn_risk,
|
224
|
+
b.login_days,
|
225
|
+
CASE
|
226
|
+
WHEN b.asset_total_bal BETWEEN 10000 AND 100000 THEN 1
|
227
|
+
ELSE 0
|
228
|
+
END as is_daifa_longtail
|
229
|
+
FROM xinjiang_icbc_daifa_hlwj_monthly_stat_wy b
|
230
|
+
WHERE b.data_dt = '{baseline_date}'
|
231
|
+
),
|
232
|
+
|
233
|
+
next_month_performance AS (
|
234
|
+
-- 计算下个月的资产变化和行为
|
235
|
+
SELECT
|
236
|
+
party_id,
|
237
|
+
asset_total_bal as next_month_asset,
|
238
|
+
monthly_withdraw_amount,
|
239
|
+
login_days as next_month_login_days
|
240
|
+
FROM xinjiang_icbc_daifa_hlwj_monthly_stat_wy
|
241
|
+
WHERE data_dt = '{next_month_date}'
|
242
|
+
),
|
243
|
+
|
244
|
+
churn_analysis AS (
|
245
|
+
-- 分析流失风险情况
|
246
|
+
SELECT
|
247
|
+
cb.party_id,
|
248
|
+
cb.current_asset,
|
249
|
+
nmp.next_month_asset,
|
250
|
+
(cb.current_asset - nmp.next_month_asset) as asset_decrease,
|
251
|
+
nmp.monthly_withdraw_amount,
|
252
|
+
cb.churn_risk,
|
253
|
+
cb.login_days,
|
254
|
+
nmp.next_month_login_days
|
255
|
+
FROM customer_baseline cb
|
256
|
+
INNER JOIN next_month_performance nmp ON cb.party_id = nmp.party_id
|
257
|
+
WHERE cb.is_daifa_longtail = 1 -- 只关注代发长尾客户
|
258
|
+
)
|
259
|
+
|
260
|
+
SELECT
|
261
|
+
party_id,
|
262
|
+
CASE
|
263
|
+
-- 代发长尾客户流失1.5k的判断标准
|
264
|
+
WHEN asset_decrease >= 1500 -- 资产减少达到1500元
|
265
|
+
AND monthly_withdraw_amount >= 1500 -- 主要通过取出导致
|
266
|
+
AND (
|
267
|
+
churn_risk >= 0.7 -- 流失风险评分高
|
268
|
+
OR next_month_login_days <= login_days * 0.5 -- 活跃度大幅下降
|
269
|
+
)
|
270
|
+
THEN 1
|
271
|
+
ELSE 0
|
272
|
+
END as churn_1_5k_target,
|
273
|
+
|
274
|
+
-- 辅助分析字段
|
275
|
+
current_asset,
|
276
|
+
next_month_asset,
|
277
|
+
asset_decrease,
|
278
|
+
monthly_withdraw_amount,
|
279
|
+
churn_risk,
|
280
|
+
login_days,
|
281
|
+
next_month_login_days
|
282
|
+
|
283
|
+
FROM churn_analysis
|
284
|
+
""",
|
285
|
+
target_column="churn_1_5k_target",
|
286
|
+
class_labels=["no_churn", "churn_1_5k"],
|
287
|
+
class_weights={"no_churn": 1.0, "churn_1_5k": 3.0}, # 流失类样本权重更高
|
288
|
+
time_window="1_month",
|
289
|
+
prediction_horizon="1_month",
|
290
|
+
bank_code="xinjiang_icbc",
|
291
|
+
business_rules={
|
292
|
+
"min_asset_threshold": 10000, # 代发长尾最小资产
|
293
|
+
"max_asset_threshold": 100000, # 代发长尾最大资产
|
294
|
+
"churn_threshold_amount": 1500, # 流失阈值金额
|
295
|
+
"min_churn_risk": 0.7, # 最小流失风险
|
296
|
+
"activity_decline_ratio": 0.5 # 活跃度下降比例
|
297
|
+
}
|
298
|
+
)
|
299
|
+
|
300
|
+
return {
|
301
|
+
"model_config": model_config,
|
302
|
+
"target_definition": target_definition,
|
303
|
+
"model_type": "churn_prevention"
|
304
|
+
}
|
305
|
+
|
306
|
+
|
307
|
+
def create_both_daifa_models(output_dir: str = "./xinjiang_models") -> Dict:
|
308
|
+
"""创建两个代发长尾客户模型并注册"""
|
309
|
+
|
310
|
+
# 确保输出目录存在
|
311
|
+
os.makedirs(output_dir, exist_ok=True)
|
312
|
+
|
313
|
+
# 创建提升模型
|
314
|
+
upgrade_model = create_daifa_longtail_upgrade_model()
|
315
|
+
upgrade_id = register_model(
|
316
|
+
upgrade_model["model_config"],
|
317
|
+
upgrade_model["target_definition"]
|
318
|
+
)
|
319
|
+
|
320
|
+
# 创建防流失模型
|
321
|
+
churn_model = create_daifa_longtail_churn_model()
|
322
|
+
churn_id = register_model(
|
323
|
+
churn_model["model_config"],
|
324
|
+
churn_model["target_definition"]
|
325
|
+
)
|
326
|
+
|
327
|
+
# 保存注册信息到指定目录
|
328
|
+
registry_path = os.path.join(output_dir, "model_registry.json")
|
329
|
+
save_model_registry(registry_path)
|
330
|
+
|
331
|
+
return {
|
332
|
+
"upgrade_model": {
|
333
|
+
"model_id": upgrade_id,
|
334
|
+
"config": upgrade_model["model_config"],
|
335
|
+
"target": upgrade_model["target_definition"]
|
336
|
+
},
|
337
|
+
"churn_model": {
|
338
|
+
"model_id": churn_id,
|
339
|
+
"config": churn_model["model_config"],
|
340
|
+
"target": churn_model["target_definition"]
|
341
|
+
},
|
342
|
+
"registry_path": registry_path,
|
343
|
+
"output_dir": output_dir
|
344
|
+
}
|
345
|
+
|
346
|
+
|
347
|
+
def get_available_daifa_models() -> List[str]:
|
348
|
+
"""获取所有可用的代发长尾客户模型"""
|
349
|
+
return [
|
350
|
+
"daifa_longtail_upgrade_3k", # 代发长尾客户提升3k模型
|
351
|
+
"daifa_longtail_churn_1_5k" # 代发长尾客户防流失1.5k模型
|
352
|
+
]
|
353
|
+
|
354
|
+
|
355
|
+
# 导出函数
|
356
|
+
__all__ = [
|
357
|
+
'create_daifa_longtail_upgrade_model',
|
358
|
+
'create_daifa_longtail_churn_model',
|
359
|
+
'create_both_daifa_models',
|
360
|
+
'get_available_daifa_models'
|
361
|
+
]
|