staran 0.4.1__tar.gz → 0.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {staran-0.4.1/staran.egg-info → staran-0.5.0}/PKG-INFO +6 -6
- {staran-0.4.1 → staran-0.5.0}/README.md +5 -5
- {staran-0.4.1 → staran-0.5.0}/setup.py +2 -1
- staran-0.5.0/staran/models/__init__.py +81 -0
- staran-0.5.0/staran/models/bank_configs.py +269 -0
- staran-0.5.0/staran/models/config.py +271 -0
- staran-0.5.0/staran/models/registry.py +281 -0
- staran-0.5.0/staran/models/target.py +321 -0
- staran-0.5.0/staran/schemas/aum/__init__.py +306 -0
- {staran-0.4.1 → staran-0.5.0/staran.egg-info}/PKG-INFO +6 -6
- {staran-0.4.1 → staran-0.5.0}/staran.egg-info/SOURCES.txt +5 -0
- staran-0.4.1/staran/schemas/aum/__init__.py +0 -314
- {staran-0.4.1 → staran-0.5.0}/LICENSE +0 -0
- {staran-0.4.1 → staran-0.5.0}/setup.cfg +0 -0
- {staran-0.4.1 → staran-0.5.0}/staran/__init__.py +0 -0
- {staran-0.4.1 → staran-0.5.0}/staran/engines/__init__.py +0 -0
- {staran-0.4.1 → staran-0.5.0}/staran/engines/base.py +0 -0
- {staran-0.4.1 → staran-0.5.0}/staran/engines/hive.py +0 -0
- {staran-0.4.1 → staran-0.5.0}/staran/engines/spark.py +0 -0
- {staran-0.4.1 → staran-0.5.0}/staran/engines/turing.py +0 -0
- {staran-0.4.1 → staran-0.5.0}/staran/examples/__init__.py +0 -0
- {staran-0.4.1 → staran-0.5.0}/staran/examples/aum_longtail.py +0 -0
- {staran-0.4.1 → staran-0.5.0}/staran/examples/aum_longtail_old.py +0 -0
- {staran-0.4.1 → staran-0.5.0}/staran/features/__init__.py +0 -0
- {staran-0.4.1 → staran-0.5.0}/staran/features/engines.py +0 -0
- {staran-0.4.1 → staran-0.5.0}/staran/features/generator.py +0 -0
- {staran-0.4.1 → staran-0.5.0}/staran/features/manager.py +0 -0
- {staran-0.4.1 → staran-0.5.0}/staran/features/schema.py +0 -0
- {staran-0.4.1 → staran-0.5.0}/staran/schemas/__init__.py +0 -0
- {staran-0.4.1 → staran-0.5.0}/staran/schemas/document_generator.py +0 -0
- {staran-0.4.1 → staran-0.5.0}/staran/tools/__init__.py +0 -0
- {staran-0.4.1 → staran-0.5.0}/staran/tools/date.py +0 -0
- {staran-0.4.1 → staran-0.5.0}/staran.egg-info/dependency_links.txt +0 -0
- {staran-0.4.1 → staran-0.5.0}/staran.egg-info/requires.txt +0 -0
- {staran-0.4.1 → staran-0.5.0}/staran.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: staran
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.5.0
|
4
4
|
Summary: staran - 高性能Python工具库
|
5
5
|
Home-page: https://github.com/starlxa/staran
|
6
6
|
Author: StarAn
|
@@ -24,7 +24,7 @@ Dynamic: requires-dist
|
|
24
24
|
Dynamic: requires-python
|
25
25
|
Dynamic: summary
|
26
26
|
|
27
|
-
# Star## ✨ v0.4.
|
27
|
+
# Star## ✨ v0.4.2 新特性
|
28
28
|
|
29
29
|
- 📋 **独立Schema模块** - 专门的表结构定义和管理模块
|
30
30
|
- 📄 **文档自动生成** - 支持Markdown/PDF/HTML格式的技术文档生成
|
@@ -42,7 +42,7 @@ Dynamic: summary
|
|
42
42
|
|
43
43
|
Staran是一个强大的特征工程和数据处理工具包,提供从数据到模型的完整解决方案。特别针对工银图灵平台优化,让特征工程和模型训练变得前所未有的简单。
|
44
44
|
|
45
|
-
## ✨ v0.4.
|
45
|
+
## ✨ v0.4.2 新特性
|
46
46
|
|
47
47
|
- �️ **模块化引擎架构** - 独立的引擎模块,支持Spark、Hive、图灵平台
|
48
48
|
- 🔧 **统一接口设计** - 所有引擎提供一致的SQL生成、执行和下载接口
|
@@ -361,7 +361,7 @@ tomorrow = date.add_days(1) # 202504 (智能处理)
|
|
361
361
|
|
362
362
|
```
|
363
363
|
staran/
|
364
|
-
├── __init__.py # 主包入口,v0.4.
|
364
|
+
├── __init__.py # 主包入口,v0.4.2功能导出
|
365
365
|
├── schemas/ # 🆕 表结构定义与文档生成模块
|
366
366
|
│ ├── __init__.py # Schema模块入口
|
367
367
|
│ ├── document_generator.py # 文档生成器 (MD/PDF/HTML)
|
@@ -385,7 +385,7 @@ staran/
|
|
385
385
|
│ ├── __init__.py # 工具模块
|
386
386
|
│ └── date.py # Date类实现
|
387
387
|
├── setup.py # 安装配置
|
388
|
-
├── README.md # 本文档 v0.4.
|
388
|
+
├── README.md # 本文档 v0.4.2
|
389
389
|
└── quick-upload.sh # 快速部署脚本
|
390
390
|
```
|
391
391
|
|
@@ -561,4 +561,4 @@ MIT License
|
|
561
561
|
|
562
562
|
---
|
563
563
|
|
564
|
-
**Staran v0.4.
|
564
|
+
**Staran v0.4.2** - 模块化引擎架构,让机器学习特征工程变得前所未有的简单 🌟
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Star## ✨ v0.4.
|
1
|
+
# Star## ✨ v0.4.2 新特性
|
2
2
|
|
3
3
|
- 📋 **独立Schema模块** - 专门的表结构定义和管理模块
|
4
4
|
- 📄 **文档自动生成** - 支持Markdown/PDF/HTML格式的技术文档生成
|
@@ -16,7 +16,7 @@
|
|
16
16
|
|
17
17
|
Staran是一个强大的特征工程和数据处理工具包,提供从数据到模型的完整解决方案。特别针对工银图灵平台优化,让特征工程和模型训练变得前所未有的简单。
|
18
18
|
|
19
|
-
## ✨ v0.4.
|
19
|
+
## ✨ v0.4.2 新特性
|
20
20
|
|
21
21
|
- �️ **模块化引擎架构** - 独立的引擎模块,支持Spark、Hive、图灵平台
|
22
22
|
- 🔧 **统一接口设计** - 所有引擎提供一致的SQL生成、执行和下载接口
|
@@ -335,7 +335,7 @@ tomorrow = date.add_days(1) # 202504 (智能处理)
|
|
335
335
|
|
336
336
|
```
|
337
337
|
staran/
|
338
|
-
├── __init__.py # 主包入口,v0.4.
|
338
|
+
├── __init__.py # 主包入口,v0.4.2功能导出
|
339
339
|
├── schemas/ # 🆕 表结构定义与文档生成模块
|
340
340
|
│ ├── __init__.py # Schema模块入口
|
341
341
|
│ ├── document_generator.py # 文档生成器 (MD/PDF/HTML)
|
@@ -359,7 +359,7 @@ staran/
|
|
359
359
|
│ ├── __init__.py # 工具模块
|
360
360
|
│ └── date.py # Date类实现
|
361
361
|
├── setup.py # 安装配置
|
362
|
-
├── README.md # 本文档 v0.4.
|
362
|
+
├── README.md # 本文档 v0.4.2
|
363
363
|
└── quick-upload.sh # 快速部署脚本
|
364
364
|
```
|
365
365
|
|
@@ -535,4 +535,4 @@ MIT License
|
|
535
535
|
|
536
536
|
---
|
537
537
|
|
538
|
-
**Staran v0.4.
|
538
|
+
**Staran v0.4.2** - 模块化引擎架构,让机器学习特征工程变得前所未有的简单 🌟
|
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
|
|
2
2
|
|
3
3
|
setup(
|
4
4
|
name="staran",
|
5
|
-
version="0.
|
5
|
+
version="0.5.0",
|
6
6
|
description="staran - 高性能Python工具库",
|
7
7
|
long_description=open("README.md", encoding="utf-8").read(),
|
8
8
|
long_description_content_type="text/markdown",
|
@@ -17,6 +17,7 @@ setup(
|
|
17
17
|
"staran.examples",
|
18
18
|
"staran.schemas",
|
19
19
|
"staran.schemas.aum",
|
20
|
+
"staran.models",
|
20
21
|
],
|
21
22
|
install_requires=[
|
22
23
|
"datetime",
|
@@ -0,0 +1,81 @@
|
|
1
|
+
"""
|
2
|
+
Staran Models Module - v0.5.0
|
3
|
+
|
4
|
+
专业的机器学习模型配置和管理模块,提供:
|
5
|
+
- 模型配置管理 (ModelConfig)
|
6
|
+
- 目标变量定义 (TargetDefinition)
|
7
|
+
- 银行特定配置支持
|
8
|
+
- SQL驱动的target生成
|
9
|
+
- 模型部署和版本管理
|
10
|
+
|
11
|
+
支持的模型类型:
|
12
|
+
- 分类模型 (Classification)
|
13
|
+
- 回归模型 (Regression)
|
14
|
+
- 聚类模型 (Clustering)
|
15
|
+
- 时间序列模型 (TimeSeries)
|
16
|
+
|
17
|
+
支持的银行:
|
18
|
+
- 工商银行 (ICBC)
|
19
|
+
- 通用配置 (Generic)
|
20
|
+
"""
|
21
|
+
|
22
|
+
from .config import ModelConfig, ModelType, create_model_config
|
23
|
+
from .target import TargetDefinition, TargetType, create_target_definition
|
24
|
+
from .registry import ModelRegistry, register_model, get_model_config, save_model_registry
|
25
|
+
from .bank_configs import BankConfig, get_bank_config, register_bank_config
|
26
|
+
|
27
|
+
# 版本信息
|
28
|
+
__version__ = "0.5.0"
|
29
|
+
|
30
|
+
# 主要导出
|
31
|
+
__all__ = [
|
32
|
+
# 模型配置
|
33
|
+
'ModelConfig',
|
34
|
+
'ModelType',
|
35
|
+
'create_model_config',
|
36
|
+
|
37
|
+
# 目标定义
|
38
|
+
'TargetDefinition',
|
39
|
+
'TargetType',
|
40
|
+
'create_target_definition',
|
41
|
+
|
42
|
+
# 模型注册
|
43
|
+
'ModelRegistry',
|
44
|
+
'register_model',
|
45
|
+
'get_model_config',
|
46
|
+
'save_model_registry',
|
47
|
+
|
48
|
+
# 银行配置
|
49
|
+
'BankConfig',
|
50
|
+
'get_bank_config',
|
51
|
+
'register_bank_config',
|
52
|
+
]
|
53
|
+
|
54
|
+
# 便捷函数
|
55
|
+
def create_icbc_model(model_name: str, model_type: str, target_sql: str, algorithm: str = "random_forest", **kwargs):
|
56
|
+
"""创建工商银行专用模型配置的便捷函数"""
|
57
|
+
bank_config = get_bank_config('icbc')
|
58
|
+
model_config = create_model_config(
|
59
|
+
name=model_name,
|
60
|
+
model_type=model_type,
|
61
|
+
algorithm=algorithm,
|
62
|
+
bank_code="icbc",
|
63
|
+
**kwargs
|
64
|
+
)
|
65
|
+
|
66
|
+
target_config = create_target_definition(
|
67
|
+
name=f"{model_name}_target",
|
68
|
+
target_type="sql_based",
|
69
|
+
sql_query=target_sql,
|
70
|
+
bank_code="icbc"
|
71
|
+
)
|
72
|
+
|
73
|
+
return register_model(model_config, target_config)
|
74
|
+
|
75
|
+
def list_available_models():
|
76
|
+
"""列出所有可用的模型配置"""
|
77
|
+
return ModelRegistry.list_models()
|
78
|
+
|
79
|
+
def get_model_summary(model_name: str):
|
80
|
+
"""获取模型配置摘要"""
|
81
|
+
return ModelRegistry.get_model_summary(model_name)
|
@@ -0,0 +1,269 @@
|
|
1
|
+
"""
|
2
|
+
银行特定配置模块
|
3
|
+
|
4
|
+
为不同银行提供定制化的配置和业务规则
|
5
|
+
"""
|
6
|
+
|
7
|
+
from enum import Enum
|
8
|
+
from typing import Dict, Any, List, Optional
|
9
|
+
from dataclasses import dataclass, field
|
10
|
+
|
11
|
+
|
12
|
+
class BankCode(Enum):
|
13
|
+
"""银行代码枚举"""
|
14
|
+
ICBC = "icbc" # 工商银行
|
15
|
+
CCB = "ccb" # 建设银行
|
16
|
+
BOC = "boc" # 中国银行
|
17
|
+
ABC = "abc" # 农业银行
|
18
|
+
CMB = "cmb" # 招商银行
|
19
|
+
GENERIC = "generic" # 通用配置
|
20
|
+
|
21
|
+
|
22
|
+
@dataclass
|
23
|
+
class BankConfig:
|
24
|
+
"""银行配置类"""
|
25
|
+
# 基本信息
|
26
|
+
bank_code: str # 银行代码
|
27
|
+
bank_name: str # 银行名称
|
28
|
+
region: str = "cn" # 地区代码
|
29
|
+
|
30
|
+
# 数据库配置
|
31
|
+
database_config: Dict[str, Any] = field(default_factory=dict)
|
32
|
+
|
33
|
+
# 表名映射 (不同银行的表名可能不同)
|
34
|
+
table_mappings: Dict[str, str] = field(default_factory=dict)
|
35
|
+
|
36
|
+
# 字段映射 (不同银行的字段名可能不同)
|
37
|
+
field_mappings: Dict[str, Dict[str, str]] = field(default_factory=dict)
|
38
|
+
|
39
|
+
# 业务规则
|
40
|
+
business_rules: Dict[str, Any] = field(default_factory=dict)
|
41
|
+
|
42
|
+
# 合规要求
|
43
|
+
compliance_rules: Dict[str, Any] = field(default_factory=dict)
|
44
|
+
|
45
|
+
# 数据处理规则
|
46
|
+
data_processing_rules: Dict[str, Any] = field(default_factory=dict)
|
47
|
+
|
48
|
+
# 模型部署配置
|
49
|
+
deployment_config: Dict[str, Any] = field(default_factory=dict)
|
50
|
+
|
51
|
+
# 特征工程配置
|
52
|
+
feature_engineering_config: Dict[str, Any] = field(default_factory=dict)
|
53
|
+
|
54
|
+
def get_table_name(self, standard_table: str) -> str:
|
55
|
+
"""获取银行特定的表名"""
|
56
|
+
return self.table_mappings.get(standard_table, standard_table)
|
57
|
+
|
58
|
+
def get_field_name(self, table: str, standard_field: str) -> str:
|
59
|
+
"""获取银行特定的字段名"""
|
60
|
+
table_fields = self.field_mappings.get(table, {})
|
61
|
+
return table_fields.get(standard_field, standard_field)
|
62
|
+
|
63
|
+
def get_business_rule(self, rule_name: str, default=None):
|
64
|
+
"""获取业务规则"""
|
65
|
+
return self.business_rules.get(rule_name, default)
|
66
|
+
|
67
|
+
def validate_compliance(self, operation: str) -> bool:
|
68
|
+
"""验证操作是否符合合规要求"""
|
69
|
+
compliance_checks = self.compliance_rules.get(operation, {})
|
70
|
+
# 这里可以实现具体的合规检查逻辑
|
71
|
+
return compliance_checks.get('enabled', True)
|
72
|
+
|
73
|
+
|
74
|
+
# 银行配置注册表
|
75
|
+
_BANK_CONFIGS: Dict[str, BankConfig] = {}
|
76
|
+
|
77
|
+
|
78
|
+
def register_bank_config(config: BankConfig):
|
79
|
+
"""注册银行配置"""
|
80
|
+
_BANK_CONFIGS[config.bank_code] = config
|
81
|
+
print(f"✅ 银行配置 {config.bank_code} ({config.bank_name}) 注册成功")
|
82
|
+
|
83
|
+
|
84
|
+
def get_bank_config(bank_code: str) -> Optional[BankConfig]:
|
85
|
+
"""获取银行配置"""
|
86
|
+
return _BANK_CONFIGS.get(bank_code)
|
87
|
+
|
88
|
+
|
89
|
+
def list_bank_configs() -> List[Dict[str, str]]:
|
90
|
+
"""列出所有银行配置"""
|
91
|
+
return [
|
92
|
+
{
|
93
|
+
'bank_code': config.bank_code,
|
94
|
+
'bank_name': config.bank_name,
|
95
|
+
'region': config.region
|
96
|
+
}
|
97
|
+
for config in _BANK_CONFIGS.values()
|
98
|
+
]
|
99
|
+
|
100
|
+
|
101
|
+
# 预定义银行配置
|
102
|
+
def create_icbc_config() -> BankConfig:
|
103
|
+
"""创建工商银行配置"""
|
104
|
+
return BankConfig(
|
105
|
+
bank_code="icbc",
|
106
|
+
bank_name="中国工商银行",
|
107
|
+
region="cn",
|
108
|
+
|
109
|
+
database_config={
|
110
|
+
"default_database": "dwegdata03000",
|
111
|
+
"connection_pool_size": 10,
|
112
|
+
"query_timeout": 300
|
113
|
+
},
|
114
|
+
|
115
|
+
table_mappings={
|
116
|
+
"behavior_table": "bi_hlwj_dfcw_f1_f4_wy",
|
117
|
+
"asset_avg_table": "bi_hlwj_zi_chan_avg_wy",
|
118
|
+
"asset_config_table": "bi_hlwj_zi_chang_month_total_zb",
|
119
|
+
"monthly_stat_table": "bi_hlwj_realy_month_stat_wy"
|
120
|
+
},
|
121
|
+
|
122
|
+
field_mappings={
|
123
|
+
"behavior_table": {
|
124
|
+
"customer_id": "party_id",
|
125
|
+
"date_field": "data_dt"
|
126
|
+
}
|
127
|
+
},
|
128
|
+
|
129
|
+
business_rules={
|
130
|
+
"data_retention_days": 90,
|
131
|
+
"min_sample_size": 1000,
|
132
|
+
"max_features": 500,
|
133
|
+
"risk_threshold": 0.8,
|
134
|
+
"aum_threshold": 100000,
|
135
|
+
"longtail_definition": {
|
136
|
+
"asset_threshold": 50000,
|
137
|
+
"activity_threshold": 0.3
|
138
|
+
}
|
139
|
+
},
|
140
|
+
|
141
|
+
compliance_rules={
|
142
|
+
"data_export": {
|
143
|
+
"enabled": True,
|
144
|
+
"approval_required": True,
|
145
|
+
"encryption_required": True
|
146
|
+
},
|
147
|
+
"model_deployment": {
|
148
|
+
"enabled": True,
|
149
|
+
"testing_required": True,
|
150
|
+
"documentation_required": True
|
151
|
+
},
|
152
|
+
"feature_selection": {
|
153
|
+
"enabled": True,
|
154
|
+
"sensitive_data_allowed": False,
|
155
|
+
"audit_trail_required": True
|
156
|
+
}
|
157
|
+
},
|
158
|
+
|
159
|
+
data_processing_rules={
|
160
|
+
"missing_value_strategy": "median",
|
161
|
+
"outlier_detection": True,
|
162
|
+
"outlier_threshold": 3.0,
|
163
|
+
"feature_scaling": "standard",
|
164
|
+
"categorical_encoding": "one_hot"
|
165
|
+
},
|
166
|
+
|
167
|
+
deployment_config={
|
168
|
+
"platform": "turing",
|
169
|
+
"environment": "production",
|
170
|
+
"monitoring_enabled": True,
|
171
|
+
"auto_scaling": True,
|
172
|
+
"backup_required": True
|
173
|
+
},
|
174
|
+
|
175
|
+
feature_engineering_config={
|
176
|
+
"time_windows": ["1_month", "3_months", "6_months", "1_year"],
|
177
|
+
"aggregation_functions": ["sum", "avg", "max", "min", "std"],
|
178
|
+
"interaction_features": True,
|
179
|
+
"polynomial_features": False,
|
180
|
+
"target_encoding": True
|
181
|
+
}
|
182
|
+
)
|
183
|
+
|
184
|
+
|
185
|
+
def create_generic_config() -> BankConfig:
|
186
|
+
"""创建通用银行配置"""
|
187
|
+
return BankConfig(
|
188
|
+
bank_code="generic",
|
189
|
+
bank_name="通用银行配置",
|
190
|
+
region="generic",
|
191
|
+
|
192
|
+
database_config={
|
193
|
+
"default_database": "default_db",
|
194
|
+
"connection_pool_size": 5,
|
195
|
+
"query_timeout": 180
|
196
|
+
},
|
197
|
+
|
198
|
+
table_mappings={
|
199
|
+
"behavior_table": "customer_behavior",
|
200
|
+
"asset_avg_table": "customer_assets",
|
201
|
+
"asset_config_table": "asset_config",
|
202
|
+
"monthly_stat_table": "monthly_stats"
|
203
|
+
},
|
204
|
+
|
205
|
+
business_rules={
|
206
|
+
"data_retention_days": 30,
|
207
|
+
"min_sample_size": 100,
|
208
|
+
"max_features": 100
|
209
|
+
},
|
210
|
+
|
211
|
+
compliance_rules={
|
212
|
+
"data_export": {"enabled": True},
|
213
|
+
"model_deployment": {"enabled": True}
|
214
|
+
},
|
215
|
+
|
216
|
+
data_processing_rules={
|
217
|
+
"missing_value_strategy": "mean",
|
218
|
+
"outlier_detection": False,
|
219
|
+
"feature_scaling": "none"
|
220
|
+
}
|
221
|
+
)
|
222
|
+
|
223
|
+
|
224
|
+
# 初始化默认银行配置
|
225
|
+
def initialize_default_configs():
|
226
|
+
"""初始化默认银行配置"""
|
227
|
+
# 注册工商银行配置
|
228
|
+
register_bank_config(create_icbc_config())
|
229
|
+
|
230
|
+
# 注册通用配置
|
231
|
+
register_bank_config(create_generic_config())
|
232
|
+
|
233
|
+
|
234
|
+
# 自动初始化
|
235
|
+
initialize_default_configs()
|
236
|
+
|
237
|
+
|
238
|
+
# 新疆工行特定配置
|
239
|
+
def create_xinjiang_icbc_config() -> BankConfig:
|
240
|
+
"""创建新疆工商银行配置"""
|
241
|
+
base_config = create_icbc_config()
|
242
|
+
|
243
|
+
# 基于基础工行配置进行定制
|
244
|
+
base_config.bank_code = "xinjiang_icbc"
|
245
|
+
base_config.bank_name = "新疆工商银行"
|
246
|
+
base_config.region = "xinjiang"
|
247
|
+
|
248
|
+
# 新疆特定的业务规则
|
249
|
+
base_config.business_rules.update({
|
250
|
+
"regional_compliance": True,
|
251
|
+
"minority_customer_support": True,
|
252
|
+
"language_support": ["zh", "ug"], # 中文和维吾尔语
|
253
|
+
"timezone": "Asia/Urumqi",
|
254
|
+
"currency_support": ["CNY"],
|
255
|
+
"cross_border_transaction": True
|
256
|
+
})
|
257
|
+
|
258
|
+
# 新疆特定的数据处理规则
|
259
|
+
base_config.data_processing_rules.update({
|
260
|
+
"character_encoding": "utf-8",
|
261
|
+
"regional_holidays": True,
|
262
|
+
"time_zone_conversion": True
|
263
|
+
})
|
264
|
+
|
265
|
+
return base_config
|
266
|
+
|
267
|
+
|
268
|
+
# 注册新疆工行配置
|
269
|
+
register_bank_config(create_xinjiang_icbc_config())
|