staran 0.5.0__tar.gz → 0.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. {staran-0.5.0/staran.egg-info → staran-0.6.0}/PKG-INFO +6 -6
  2. {staran-0.5.0 → staran-0.6.0}/README.md +5 -5
  3. {staran-0.5.0 → staran-0.6.0}/setup.py +3 -2
  4. staran-0.6.0/staran/__init__.py +0 -0
  5. staran-0.6.0/staran/banks/__init__.py +30 -0
  6. staran-0.6.0/staran/banks/xinjiang_icbc/__init__.py +90 -0
  7. staran-0.6.0/staran/models/__init__.py +72 -0
  8. staran-0.6.0/staran/models/daifa_models.py +361 -0
  9. staran-0.6.0/staran/schemas/__init__.py +27 -0
  10. staran-0.6.0/staran/schemas/aum/__init__.py +210 -0
  11. {staran-0.5.0 → staran-0.6.0/staran.egg-info}/PKG-INFO +6 -6
  12. {staran-0.5.0 → staran-0.6.0}/staran.egg-info/SOURCES.txt +5 -6
  13. staran-0.5.0/staran/__init__.py +0 -261
  14. staran-0.5.0/staran/examples/__init__.py +0 -8
  15. staran-0.5.0/staran/examples/aum_longtail.py +0 -250
  16. staran-0.5.0/staran/examples/aum_longtail_old.py +0 -487
  17. staran-0.5.0/staran/models/__init__.py +0 -81
  18. staran-0.5.0/staran/models/bank_configs.py +0 -269
  19. staran-0.5.0/staran/schemas/__init__.py +0 -28
  20. staran-0.5.0/staran/schemas/aum/__init__.py +0 -306
  21. {staran-0.5.0 → staran-0.6.0}/LICENSE +0 -0
  22. {staran-0.5.0 → staran-0.6.0}/setup.cfg +0 -0
  23. {staran-0.5.0 → staran-0.6.0}/staran/engines/__init__.py +0 -0
  24. {staran-0.5.0 → staran-0.6.0}/staran/engines/base.py +0 -0
  25. {staran-0.5.0 → staran-0.6.0}/staran/engines/hive.py +0 -0
  26. {staran-0.5.0 → staran-0.6.0}/staran/engines/spark.py +0 -0
  27. {staran-0.5.0 → staran-0.6.0}/staran/engines/turing.py +0 -0
  28. {staran-0.5.0 → staran-0.6.0}/staran/features/__init__.py +0 -0
  29. {staran-0.5.0 → staran-0.6.0}/staran/features/engines.py +0 -0
  30. {staran-0.5.0 → staran-0.6.0}/staran/features/generator.py +0 -0
  31. {staran-0.5.0 → staran-0.6.0}/staran/features/manager.py +0 -0
  32. {staran-0.5.0 → staran-0.6.0}/staran/features/schema.py +0 -0
  33. {staran-0.5.0 → staran-0.6.0}/staran/models/config.py +0 -0
  34. {staran-0.5.0 → staran-0.6.0}/staran/models/registry.py +0 -0
  35. {staran-0.5.0 → staran-0.6.0}/staran/models/target.py +0 -0
  36. {staran-0.5.0 → staran-0.6.0}/staran/tools/__init__.py +0 -0
  37. {staran-0.5.0 → staran-0.6.0}/staran/tools/date.py +0 -0
  38. {staran-0.5.0/staran/schemas → staran-0.6.0/staran/tools}/document_generator.py +0 -0
  39. {staran-0.5.0 → staran-0.6.0}/staran.egg-info/dependency_links.txt +0 -0
  40. {staran-0.5.0 → staran-0.6.0}/staran.egg-info/requires.txt +0 -0
  41. {staran-0.5.0 → staran-0.6.0}/staran.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: staran
3
- Version: 0.5.0
3
+ Version: 0.6.0
4
4
  Summary: staran - 高性能Python工具库
5
5
  Home-page: https://github.com/starlxa/staran
6
6
  Author: StarAn
@@ -24,7 +24,7 @@ Dynamic: requires-dist
24
24
  Dynamic: requires-python
25
25
  Dynamic: summary
26
26
 
27
- # Star## ✨ v0.4.2 新特性
27
+ # Star## ✨ v0.6.0 新特性
28
28
 
29
29
  - 📋 **独立Schema模块** - 专门的表结构定义和管理模块
30
30
  - 📄 **文档自动生成** - 支持Markdown/PDF/HTML格式的技术文档生成
@@ -42,7 +42,7 @@ Dynamic: summary
42
42
 
43
43
  Staran是一个强大的特征工程和数据处理工具包,提供从数据到模型的完整解决方案。特别针对工银图灵平台优化,让特征工程和模型训练变得前所未有的简单。
44
44
 
45
- ## ✨ v0.4.2 新特性
45
+ ## ✨ v0.6.0 新特性
46
46
 
47
47
  - �️ **模块化引擎架构** - 独立的引擎模块,支持Spark、Hive、图灵平台
48
48
  - 🔧 **统一接口设计** - 所有引擎提供一致的SQL生成、执行和下载接口
@@ -361,7 +361,7 @@ tomorrow = date.add_days(1) # 202504 (智能处理)
361
361
 
362
362
  ```
363
363
  staran/
364
- ├── __init__.py # 主包入口,v0.4.2功能导出
364
+ ├── __init__.py # 主包入口,v0.6.0功能导出
365
365
  ├── schemas/ # 🆕 表结构定义与文档生成模块
366
366
  │ ├── __init__.py # Schema模块入口
367
367
  │ ├── document_generator.py # 文档生成器 (MD/PDF/HTML)
@@ -385,7 +385,7 @@ staran/
385
385
  │ ├── __init__.py # 工具模块
386
386
  │ └── date.py # Date类实现
387
387
  ├── setup.py # 安装配置
388
- ├── README.md # 本文档 v0.4.2
388
+ ├── README.md # 本文档 v0.6.0
389
389
  └── quick-upload.sh # 快速部署脚本
390
390
  ```
391
391
 
@@ -561,4 +561,4 @@ MIT License
561
561
 
562
562
  ---
563
563
 
564
- **Staran v0.4.2** - 模块化引擎架构,让机器学习特征工程变得前所未有的简单 🌟
564
+ **Staran v0.6.0** - 模块化引擎架构,让机器学习特征工程变得前所未有的简单 🌟
@@ -1,4 +1,4 @@
1
- # Star## ✨ v0.4.2 新特性
1
+ # Star## ✨ v0.6.0 新特性
2
2
 
3
3
  - 📋 **独立Schema模块** - 专门的表结构定义和管理模块
4
4
  - 📄 **文档自动生成** - 支持Markdown/PDF/HTML格式的技术文档生成
@@ -16,7 +16,7 @@
16
16
 
17
17
  Staran是一个强大的特征工程和数据处理工具包,提供从数据到模型的完整解决方案。特别针对工银图灵平台优化,让特征工程和模型训练变得前所未有的简单。
18
18
 
19
- ## ✨ v0.4.2 新特性
19
+ ## ✨ v0.6.0 新特性
20
20
 
21
21
  - �️ **模块化引擎架构** - 独立的引擎模块,支持Spark、Hive、图灵平台
22
22
  - 🔧 **统一接口设计** - 所有引擎提供一致的SQL生成、执行和下载接口
@@ -335,7 +335,7 @@ tomorrow = date.add_days(1) # 202504 (智能处理)
335
335
 
336
336
  ```
337
337
  staran/
338
- ├── __init__.py # 主包入口,v0.4.2功能导出
338
+ ├── __init__.py # 主包入口,v0.6.0功能导出
339
339
  ├── schemas/ # 🆕 表结构定义与文档生成模块
340
340
  │ ├── __init__.py # Schema模块入口
341
341
  │ ├── document_generator.py # 文档生成器 (MD/PDF/HTML)
@@ -359,7 +359,7 @@ staran/
359
359
  │ ├── __init__.py # 工具模块
360
360
  │ └── date.py # Date类实现
361
361
  ├── setup.py # 安装配置
362
- ├── README.md # 本文档 v0.4.2
362
+ ├── README.md # 本文档 v0.6.0
363
363
  └── quick-upload.sh # 快速部署脚本
364
364
  ```
365
365
 
@@ -535,4 +535,4 @@ MIT License
535
535
 
536
536
  ---
537
537
 
538
- **Staran v0.4.2** - 模块化引擎架构,让机器学习特征工程变得前所未有的简单 🌟
538
+ **Staran v0.6.0** - 模块化引擎架构,让机器学习特征工程变得前所未有的简单 🌟
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
2
2
 
3
3
  setup(
4
4
  name="staran",
5
- version="0.5.0",
5
+ version="0.6.0",
6
6
  description="staran - 高性能Python工具库",
7
7
  long_description=open("README.md", encoding="utf-8").read(),
8
8
  long_description_content_type="text/markdown",
@@ -14,10 +14,11 @@ setup(
14
14
  "staran.tools",
15
15
  "staran.features",
16
16
  "staran.engines",
17
- "staran.examples",
18
17
  "staran.schemas",
19
18
  "staran.schemas.aum",
20
19
  "staran.models",
20
+ "staran.banks",
21
+ "staran.banks.xinjiang_icbc",
21
22
  ],
22
23
  install_requires=[
23
24
  "datetime",
File without changes
@@ -0,0 +1,30 @@
1
+ """
2
+ staran.banks - 银行配置模块
3
+
4
+ 该模块包含不同银行的特定配置,包括:
5
+ - 数据库连接配置
6
+ - 表结构定义
7
+ - 业务规则设置
8
+ - 模型配置
9
+
10
+ 支持的银行:
11
+ - xinjiang_icbc: 新疆工行配置
12
+
13
+ 版本: 0.6.0
14
+ """
15
+
16
+ from .xinjiang_icbc import (
17
+ XinjiangICBCConfig,
18
+ get_xinjiang_icbc_tables,
19
+ get_xinjiang_icbc_models,
20
+ xinjiang_icbc_config
21
+ )
22
+
23
+ __all__ = [
24
+ 'XinjiangICBCConfig',
25
+ 'xinjiang_icbc_config',
26
+ 'get_xinjiang_icbc_tables',
27
+ 'get_xinjiang_icbc_models'
28
+ ]
29
+
30
+ __version__ = "0.6.0"
@@ -0,0 +1,90 @@
1
+ """
2
+ 新疆工行银行配置模块
3
+
4
+ 专门针对新疆工行代发长尾客户的配置:
5
+ - 数据库表结构定义(代发长尾客户专用)
6
+ - 业务规则配置
7
+ - 模型配置(提升模型和防流失模型)
8
+
9
+ 数据库: xinjiang_icbc_daifa_longtail
10
+ 业务范围: 代发长尾客户
11
+ """
12
+
13
+ from dataclasses import dataclass
14
+ from typing import Dict, List, Optional
15
+ from datetime import datetime
16
+
17
+
18
+ @dataclass
19
+ class XinjiangICBCConfig:
20
+ """新疆工行配置类"""
21
+
22
+ # 数据库配置
23
+ database_name: str = "xinjiang_icbc_daifa_longtail"
24
+ schema_name: str = "daifa_longtail"
25
+
26
+ # 业务配置
27
+ business_domain: str = "代发长尾客户"
28
+ customer_segment: str = "代发长尾"
29
+
30
+ # 模型配置
31
+ available_models: List[str] = None
32
+
33
+ # 业务规则
34
+ longtail_asset_min: float = 10000 # 长尾客户最小资产
35
+ longtail_asset_max: float = 100000 # 长尾客户最大资产
36
+ upgrade_target: float = 3000 # 提升目标金额
37
+ churn_threshold: float = 1500 # 流失阈值金额
38
+
39
+ def __post_init__(self):
40
+ if self.available_models is None:
41
+ self.available_models = [
42
+ "daifa_longtail_upgrade_3k", # 代发长尾提升3k模型
43
+ "daifa_longtail_churn_1_5k" # 代发长尾防流失1.5k模型
44
+ ]
45
+
46
+
47
+ def get_xinjiang_icbc_tables() -> Dict[str, str]:
48
+ """获取新疆工行代发长尾客户表配置"""
49
+ return {
50
+ # 代发长尾客户行为表
51
+ "daifa_longtail_behavior": "xinjiang_icbc_daifa_hlwj_dfcw_f1_f4_wy",
52
+
53
+ # 代发长尾客户资产平均表
54
+ "daifa_longtail_asset_avg": "xinjiang_icbc_daifa_hlwj_zi_chan_avg_wy",
55
+
56
+ # 代发长尾客户资产配置表
57
+ "daifa_longtail_asset_config": "xinjiang_icbc_daifa_hlwj_zi_chan_config_wy",
58
+
59
+ # 代发长尾客户月度统计表
60
+ "daifa_longtail_monthly_stat": "xinjiang_icbc_daifa_hlwj_monthly_stat_wy"
61
+ }
62
+
63
+
64
+ def get_xinjiang_icbc_models() -> Dict[str, Dict]:
65
+ """获取新疆工行代发长尾客户模型配置"""
66
+ return {
67
+ "daifa_longtail_upgrade_3k": {
68
+ "name": "代发长尾客户提升3k预测模型",
69
+ "description": "预测下个月代发长尾客户资产提升3000元的概率",
70
+ "target": "upgrade_3k_next_month",
71
+ "model_type": "binary_classification",
72
+ "business_objective": "识别有潜力提升资产的代发长尾客户",
73
+ "target_threshold": 3000,
74
+ "prediction_window": "1_month"
75
+ },
76
+
77
+ "daifa_longtail_churn_1_5k": {
78
+ "name": "代发长尾客户防流失1.5k预测模型",
79
+ "description": "预测下个月代发长尾客户流失1500元资产的风险",
80
+ "target": "churn_1_5k_next_month",
81
+ "model_type": "binary_classification",
82
+ "business_objective": "识别有流失风险的代发长尾客户",
83
+ "target_threshold": 1500,
84
+ "prediction_window": "1_month"
85
+ }
86
+ }
87
+
88
+
89
+ # 创建默认配置实例
90
+ xinjiang_icbc_config = XinjiangICBCConfig()
@@ -0,0 +1,72 @@
1
+ """
2
+ staran.models v0.6.0 - 新疆工行代发长尾客户模型管理
3
+
4
+ 专门针对新疆工行代发长尾客户的两个核心模型:
5
+ 1. 代发长尾客户提升3k预测模型 (daifa_longtail_upgrade_3k)
6
+ 2. 代发长尾客户防流失1.5k预测模型 (daifa_longtail_churn_1_5k)
7
+
8
+ 主要功能:
9
+ - 模型配置管理
10
+ - SQL驱动的目标变量定义
11
+ - 模型注册和版本控制
12
+ - 新疆工行特定配置
13
+ """
14
+
15
+ from .config import ModelConfig, create_model_config
16
+ from .target import TargetDefinition, create_target_definition
17
+ from .registry import ModelRegistry, register_model, save_model_registry
18
+ from .daifa_models import (
19
+ create_daifa_longtail_upgrade_model,
20
+ create_daifa_longtail_churn_model,
21
+ get_available_daifa_models,
22
+ create_both_daifa_models
23
+ )
24
+
25
+ # 便捷函数
26
+ def create_xinjiang_icbc_models(output_dir: str = "./xinjiang_models") -> dict:
27
+ """为新疆工行创建两个代发长尾客户模型"""
28
+ return create_both_daifa_models(output_dir)
29
+
30
+ def list_available_models() -> list:
31
+ """列出所有可用的代发长尾客户模型"""
32
+ return get_available_daifa_models()
33
+
34
+ def get_model_summary() -> dict:
35
+ """获取模型概述信息"""
36
+ return {
37
+ "version": "0.6.0",
38
+ "bank": "新疆工行",
39
+ "business_domain": "代发长尾客户",
40
+ "models": [
41
+ {
42
+ "name": "daifa_longtail_upgrade_3k",
43
+ "description": "预测下个月代发长尾客户资产提升3k的概率",
44
+ "target_amount": 3000,
45
+ "model_type": "binary_classification"
46
+ },
47
+ {
48
+ "name": "daifa_longtail_churn_1_5k",
49
+ "description": "预测下个月代发长尾客户流失1.5k资产的风险",
50
+ "target_amount": 1500,
51
+ "model_type": "binary_classification"
52
+ }
53
+ ]
54
+ }
55
+
56
+ __all__ = [
57
+ # 核心组件
58
+ 'ModelConfig', 'TargetDefinition', 'ModelRegistry',
59
+
60
+ # 创建函数
61
+ 'create_model_config', 'create_target_definition', 'register_model',
62
+
63
+ # 代发长尾模型
64
+ 'create_daifa_longtail_upgrade_model', 'create_daifa_longtail_churn_model',
65
+ 'create_both_daifa_models', 'get_available_daifa_models',
66
+
67
+ # 便捷函数
68
+ 'create_xinjiang_icbc_models', 'list_available_models', 'get_model_summary',
69
+ 'save_model_registry'
70
+ ]
71
+
72
+ __version__ = "0.6.0"
@@ -0,0 +1,361 @@
1
+ """
2
+ 新疆工行代发长尾客户专用模型定义
3
+
4
+ 包含两个核心模型:
5
+ 1. 代发长尾客户提升3k预测模型
6
+ 2. 代发长尾客户防流失1.5k预测模型
7
+
8
+ 基于新疆工行代发长尾客户数据库和业务规则
9
+ """
10
+
11
+ from typing import Dict, List
12
+ from .config import create_model_config
13
+ from .target import create_target_definition
14
+ from .registry import ModelRegistry, register_model
15
+ import os
16
+ import json
17
+ from datetime import datetime
18
+
19
+
20
+ def save_model_registry(output_path: str):
21
+ """保存模型注册信息到文件"""
22
+
23
+ def convert_to_serializable(obj):
24
+ """递归转换对象为可序列化格式"""
25
+ if isinstance(obj, datetime):
26
+ return obj.isoformat()
27
+ elif hasattr(obj, '__dict__'):
28
+ result = {}
29
+ for key, value in obj.__dict__.items():
30
+ result[key] = convert_to_serializable(value)
31
+ return result
32
+ elif hasattr(obj, 'value'): # 枚举类型
33
+ return obj.value
34
+ elif isinstance(obj, (list, tuple)):
35
+ return [convert_to_serializable(item) for item in obj]
36
+ elif isinstance(obj, dict):
37
+ return {k: convert_to_serializable(v) for k, v in obj.items()}
38
+ else:
39
+ return obj
40
+
41
+ data = {
42
+ "models": {},
43
+ "version_history": {},
44
+ "saved_at": str(datetime.now())
45
+ }
46
+
47
+ # 获取所有注册的模型
48
+ for model_id, entry in ModelRegistry._models.items():
49
+ data["models"][model_id] = {
50
+ "model_config": convert_to_serializable(entry.model_config),
51
+ "target_definition": convert_to_serializable(entry.target_definition),
52
+ "registered_at": entry.registered_at.isoformat(),
53
+ "status": entry.status,
54
+ "performance_metrics": entry.performance_metrics
55
+ }
56
+
57
+ data["version_history"] = ModelRegistry._version_history.copy()
58
+
59
+ with open(output_path, 'w', encoding='utf-8') as f:
60
+ json.dump(data, f, indent=2, ensure_ascii=False)
61
+
62
+ print(f"✅ 模型注册信息已保存到: {output_path}")
63
+ return output_path
64
+
65
+
66
+ def create_daifa_longtail_upgrade_model() -> Dict:
67
+ """创建代发长尾客户提升3k预测模型"""
68
+
69
+ # 模型配置
70
+ model_config = create_model_config(
71
+ name="xinjiang_icbc_daifa_longtail_upgrade_3k",
72
+ model_type="classification",
73
+ algorithm="gradient_boosting",
74
+ version="1.0.0",
75
+ schema_name="daifa_longtail",
76
+ table_types=["daifa_longtail_behavior", "daifa_longtail_asset_avg",
77
+ "daifa_longtail_asset_config", "daifa_longtail_monthly_stat"],
78
+ hyperparameters={
79
+ "n_estimators": 300,
80
+ "learning_rate": 0.05,
81
+ "max_depth": 12,
82
+ "min_samples_split": 20,
83
+ "min_samples_leaf": 10,
84
+ "subsample": 0.8,
85
+ "random_state": 42
86
+ },
87
+ bank_code="xinjiang_icbc",
88
+ business_domain="代发长尾客户",
89
+ description="新疆工行代发长尾客户下个月资产提升3k预测模型",
90
+ tags=["daifa", "longtail", "upgrade", "3k", "xinjiang_icbc"]
91
+ )
92
+
93
+ # 目标定义 - 预测下个月提升3k
94
+ target_definition = create_target_definition(
95
+ name="daifa_longtail_upgrade_3k_target",
96
+ target_type="binary_classification",
97
+ description="新疆工行代发长尾客户下个月资产提升3000元预测目标",
98
+ sql_query="""
99
+ WITH customer_baseline AS (
100
+ -- 获取代发长尾客户基础信息(当月)
101
+ SELECT
102
+ b.party_id,
103
+ b.asset_total_bal as current_asset,
104
+ b.salary_amount as current_salary,
105
+ b.longtail_score,
106
+ b.upgrade_potential,
107
+ CASE
108
+ WHEN b.asset_total_bal BETWEEN 10000 AND 100000 THEN 1
109
+ ELSE 0
110
+ END as is_daifa_longtail
111
+ FROM xinjiang_icbc_daifa_hlwj_monthly_stat_wy b
112
+ WHERE b.data_dt = '{baseline_date}'
113
+ ),
114
+
115
+ next_month_performance AS (
116
+ -- 计算下个月的资产变化
117
+ SELECT
118
+ party_id,
119
+ asset_total_bal as next_month_asset,
120
+ salary_amount as next_month_salary,
121
+ monthly_deposit_amount,
122
+ monthly_withdraw_amount
123
+ FROM xinjiang_icbc_daifa_hlwj_monthly_stat_wy
124
+ WHERE data_dt = '{next_month_date}'
125
+ ),
126
+
127
+ asset_change AS (
128
+ -- 计算资产变化情况
129
+ SELECT
130
+ cb.party_id,
131
+ cb.current_asset,
132
+ nmp.next_month_asset,
133
+ (nmp.next_month_asset - cb.current_asset) as asset_change,
134
+ nmp.monthly_deposit_amount,
135
+ cb.upgrade_potential
136
+ FROM customer_baseline cb
137
+ INNER JOIN next_month_performance nmp ON cb.party_id = nmp.party_id
138
+ WHERE cb.is_daifa_longtail = 1 -- 只关注代发长尾客户
139
+ )
140
+
141
+ SELECT
142
+ party_id,
143
+ CASE
144
+ -- 代发长尾客户资产提升3k的判断标准
145
+ WHEN asset_change >= 3000 -- 资产增长达到3000元
146
+ AND monthly_deposit_amount > asset_change * 0.7 -- 主要通过存入实现
147
+ AND upgrade_potential >= 0.6 -- 提升潜力评分较高
148
+ THEN 1
149
+ ELSE 0
150
+ END as upgrade_3k_target,
151
+
152
+ -- 辅助分析字段
153
+ current_asset,
154
+ next_month_asset,
155
+ asset_change,
156
+ monthly_deposit_amount,
157
+ upgrade_potential
158
+
159
+ FROM asset_change
160
+ """,
161
+ target_column="upgrade_3k_target",
162
+ class_labels=["no_upgrade", "upgrade_3k"],
163
+ class_weights={"no_upgrade": 1.0, "upgrade_3k": 2.5}, # 提升类样本权重更高
164
+ time_window="1_month",
165
+ prediction_horizon="1_month",
166
+ bank_code="xinjiang_icbc",
167
+ business_rules={
168
+ "min_asset_threshold": 10000, # 代发长尾最小资产
169
+ "max_asset_threshold": 100000, # 代发长尾最大资产
170
+ "upgrade_target_amount": 3000, # 提升目标金额
171
+ "deposit_contribution_ratio": 0.7, # 存入贡献占比
172
+ "min_upgrade_potential": 0.6 # 最小提升潜力
173
+ }
174
+ )
175
+
176
+ return {
177
+ "model_config": model_config,
178
+ "target_definition": target_definition,
179
+ "model_type": "upgrade_prediction"
180
+ }
181
+
182
+
183
+ def create_daifa_longtail_churn_model() -> Dict:
184
+ """创建代发长尾客户防流失1.5k预测模型"""
185
+
186
+ # 模型配置
187
+ model_config = create_model_config(
188
+ name="xinjiang_icbc_daifa_longtail_churn_1_5k",
189
+ model_type="classification",
190
+ algorithm="random_forest", # 防流失模型使用随机森林
191
+ version="1.0.0",
192
+ schema_name="daifa_longtail",
193
+ table_types=["daifa_longtail_behavior", "daifa_longtail_asset_avg",
194
+ "daifa_longtail_asset_config", "daifa_longtail_monthly_stat"],
195
+ hyperparameters={
196
+ "n_estimators": 200,
197
+ "max_depth": 10,
198
+ "min_samples_split": 15,
199
+ "min_samples_leaf": 8,
200
+ "max_features": "sqrt",
201
+ "random_state": 42,
202
+ "class_weight": "balanced" # 处理不平衡数据
203
+ },
204
+ bank_code="xinjiang_icbc",
205
+ business_domain="代发长尾客户",
206
+ description="新疆工行代发长尾客户下个月流失1.5k资产风险预测模型",
207
+ tags=["daifa", "longtail", "churn", "1_5k", "risk_prevention"]
208
+ )
209
+
210
+ # 目标定义 - 预测下个月流失1.5k风险
211
+ target_definition = create_target_definition(
212
+ name="daifa_longtail_churn_1_5k_target",
213
+ target_type="binary_classification",
214
+ description="新疆工行代发长尾客户下个月流失1500元资产风险预测目标",
215
+ sql_query="""
216
+ WITH customer_baseline AS (
217
+ -- 获取代发长尾客户基础信息(当月)
218
+ SELECT
219
+ b.party_id,
220
+ b.asset_total_bal as current_asset,
221
+ b.salary_amount as current_salary,
222
+ b.longtail_score,
223
+ b.churn_risk,
224
+ b.login_days,
225
+ CASE
226
+ WHEN b.asset_total_bal BETWEEN 10000 AND 100000 THEN 1
227
+ ELSE 0
228
+ END as is_daifa_longtail
229
+ FROM xinjiang_icbc_daifa_hlwj_monthly_stat_wy b
230
+ WHERE b.data_dt = '{baseline_date}'
231
+ ),
232
+
233
+ next_month_performance AS (
234
+ -- 计算下个月的资产变化和行为
235
+ SELECT
236
+ party_id,
237
+ asset_total_bal as next_month_asset,
238
+ monthly_withdraw_amount,
239
+ login_days as next_month_login_days
240
+ FROM xinjiang_icbc_daifa_hlwj_monthly_stat_wy
241
+ WHERE data_dt = '{next_month_date}'
242
+ ),
243
+
244
+ churn_analysis AS (
245
+ -- 分析流失风险情况
246
+ SELECT
247
+ cb.party_id,
248
+ cb.current_asset,
249
+ nmp.next_month_asset,
250
+ (cb.current_asset - nmp.next_month_asset) as asset_decrease,
251
+ nmp.monthly_withdraw_amount,
252
+ cb.churn_risk,
253
+ cb.login_days,
254
+ nmp.next_month_login_days
255
+ FROM customer_baseline cb
256
+ INNER JOIN next_month_performance nmp ON cb.party_id = nmp.party_id
257
+ WHERE cb.is_daifa_longtail = 1 -- 只关注代发长尾客户
258
+ )
259
+
260
+ SELECT
261
+ party_id,
262
+ CASE
263
+ -- 代发长尾客户流失1.5k的判断标准
264
+ WHEN asset_decrease >= 1500 -- 资产减少达到1500元
265
+ AND monthly_withdraw_amount >= 1500 -- 主要通过取出导致
266
+ AND (
267
+ churn_risk >= 0.7 -- 流失风险评分高
268
+ OR next_month_login_days <= login_days * 0.5 -- 活跃度大幅下降
269
+ )
270
+ THEN 1
271
+ ELSE 0
272
+ END as churn_1_5k_target,
273
+
274
+ -- 辅助分析字段
275
+ current_asset,
276
+ next_month_asset,
277
+ asset_decrease,
278
+ monthly_withdraw_amount,
279
+ churn_risk,
280
+ login_days,
281
+ next_month_login_days
282
+
283
+ FROM churn_analysis
284
+ """,
285
+ target_column="churn_1_5k_target",
286
+ class_labels=["no_churn", "churn_1_5k"],
287
+ class_weights={"no_churn": 1.0, "churn_1_5k": 3.0}, # 流失类样本权重更高
288
+ time_window="1_month",
289
+ prediction_horizon="1_month",
290
+ bank_code="xinjiang_icbc",
291
+ business_rules={
292
+ "min_asset_threshold": 10000, # 代发长尾最小资产
293
+ "max_asset_threshold": 100000, # 代发长尾最大资产
294
+ "churn_threshold_amount": 1500, # 流失阈值金额
295
+ "min_churn_risk": 0.7, # 最小流失风险
296
+ "activity_decline_ratio": 0.5 # 活跃度下降比例
297
+ }
298
+ )
299
+
300
+ return {
301
+ "model_config": model_config,
302
+ "target_definition": target_definition,
303
+ "model_type": "churn_prevention"
304
+ }
305
+
306
+
307
+ def create_both_daifa_models(output_dir: str = "./xinjiang_models") -> Dict:
308
+ """创建两个代发长尾客户模型并注册"""
309
+
310
+ # 确保输出目录存在
311
+ os.makedirs(output_dir, exist_ok=True)
312
+
313
+ # 创建提升模型
314
+ upgrade_model = create_daifa_longtail_upgrade_model()
315
+ upgrade_id = register_model(
316
+ upgrade_model["model_config"],
317
+ upgrade_model["target_definition"]
318
+ )
319
+
320
+ # 创建防流失模型
321
+ churn_model = create_daifa_longtail_churn_model()
322
+ churn_id = register_model(
323
+ churn_model["model_config"],
324
+ churn_model["target_definition"]
325
+ )
326
+
327
+ # 保存注册信息到指定目录
328
+ registry_path = os.path.join(output_dir, "model_registry.json")
329
+ save_model_registry(registry_path)
330
+
331
+ return {
332
+ "upgrade_model": {
333
+ "model_id": upgrade_id,
334
+ "config": upgrade_model["model_config"],
335
+ "target": upgrade_model["target_definition"]
336
+ },
337
+ "churn_model": {
338
+ "model_id": churn_id,
339
+ "config": churn_model["model_config"],
340
+ "target": churn_model["target_definition"]
341
+ },
342
+ "registry_path": registry_path,
343
+ "output_dir": output_dir
344
+ }
345
+
346
+
347
+ def get_available_daifa_models() -> List[str]:
348
+ """获取所有可用的代发长尾客户模型"""
349
+ return [
350
+ "daifa_longtail_upgrade_3k", # 代发长尾客户提升3k模型
351
+ "daifa_longtail_churn_1_5k" # 代发长尾客户防流失1.5k模型
352
+ ]
353
+
354
+
355
+ # 导出函数
356
+ __all__ = [
357
+ 'create_daifa_longtail_upgrade_model',
358
+ 'create_daifa_longtail_churn_model',
359
+ 'create_both_daifa_models',
360
+ 'get_available_daifa_models'
361
+ ]