staran 0.4.1__tar.gz → 0.4.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. {staran-0.4.1/staran.egg-info → staran-0.4.2}/PKG-INFO +6 -6
  2. {staran-0.4.1 → staran-0.4.2}/README.md +5 -5
  3. {staran-0.4.1 → staran-0.4.2}/setup.py +1 -1
  4. staran-0.4.2/staran/schemas/aum/__init__.py +306 -0
  5. {staran-0.4.1 → staran-0.4.2/staran.egg-info}/PKG-INFO +6 -6
  6. staran-0.4.1/staran/schemas/aum/__init__.py +0 -314
  7. {staran-0.4.1 → staran-0.4.2}/LICENSE +0 -0
  8. {staran-0.4.1 → staran-0.4.2}/setup.cfg +0 -0
  9. {staran-0.4.1 → staran-0.4.2}/staran/__init__.py +0 -0
  10. {staran-0.4.1 → staran-0.4.2}/staran/engines/__init__.py +0 -0
  11. {staran-0.4.1 → staran-0.4.2}/staran/engines/base.py +0 -0
  12. {staran-0.4.1 → staran-0.4.2}/staran/engines/hive.py +0 -0
  13. {staran-0.4.1 → staran-0.4.2}/staran/engines/spark.py +0 -0
  14. {staran-0.4.1 → staran-0.4.2}/staran/engines/turing.py +0 -0
  15. {staran-0.4.1 → staran-0.4.2}/staran/examples/__init__.py +0 -0
  16. {staran-0.4.1 → staran-0.4.2}/staran/examples/aum_longtail.py +0 -0
  17. {staran-0.4.1 → staran-0.4.2}/staran/examples/aum_longtail_old.py +0 -0
  18. {staran-0.4.1 → staran-0.4.2}/staran/features/__init__.py +0 -0
  19. {staran-0.4.1 → staran-0.4.2}/staran/features/engines.py +0 -0
  20. {staran-0.4.1 → staran-0.4.2}/staran/features/generator.py +0 -0
  21. {staran-0.4.1 → staran-0.4.2}/staran/features/manager.py +0 -0
  22. {staran-0.4.1 → staran-0.4.2}/staran/features/schema.py +0 -0
  23. {staran-0.4.1 → staran-0.4.2}/staran/schemas/__init__.py +0 -0
  24. {staran-0.4.1 → staran-0.4.2}/staran/schemas/document_generator.py +0 -0
  25. {staran-0.4.1 → staran-0.4.2}/staran/tools/__init__.py +0 -0
  26. {staran-0.4.1 → staran-0.4.2}/staran/tools/date.py +0 -0
  27. {staran-0.4.1 → staran-0.4.2}/staran.egg-info/SOURCES.txt +0 -0
  28. {staran-0.4.1 → staran-0.4.2}/staran.egg-info/dependency_links.txt +0 -0
  29. {staran-0.4.1 → staran-0.4.2}/staran.egg-info/requires.txt +0 -0
  30. {staran-0.4.1 → staran-0.4.2}/staran.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: staran
3
- Version: 0.4.1
3
+ Version: 0.4.2
4
4
  Summary: staran - 高性能Python工具库
5
5
  Home-page: https://github.com/starlxa/staran
6
6
  Author: StarAn
@@ -24,7 +24,7 @@ Dynamic: requires-dist
24
24
  Dynamic: requires-python
25
25
  Dynamic: summary
26
26
 
27
- # Star## ✨ v0.4.1 新特性
27
+ # Star## ✨ v0.4.2 新特性
28
28
 
29
29
  - 📋 **独立Schema模块** - 专门的表结构定义和管理模块
30
30
  - 📄 **文档自动生成** - 支持Markdown/PDF/HTML格式的技术文档生成
@@ -42,7 +42,7 @@ Dynamic: summary
42
42
 
43
43
  Staran是一个强大的特征工程和数据处理工具包,提供从数据到模型的完整解决方案。特别针对工银图灵平台优化,让特征工程和模型训练变得前所未有的简单。
44
44
 
45
- ## ✨ v0.4.1 新特性
45
+ ## ✨ v0.4.2 新特性
46
46
 
47
47
  - �️ **模块化引擎架构** - 独立的引擎模块,支持Spark、Hive、图灵平台
48
48
  - 🔧 **统一接口设计** - 所有引擎提供一致的SQL生成、执行和下载接口
@@ -361,7 +361,7 @@ tomorrow = date.add_days(1) # 202504 (智能处理)
361
361
 
362
362
  ```
363
363
  staran/
364
- ├── __init__.py # 主包入口,v0.4.1功能导出
364
+ ├── __init__.py # 主包入口,v0.4.2功能导出
365
365
  ├── schemas/ # 🆕 表结构定义与文档生成模块
366
366
  │ ├── __init__.py # Schema模块入口
367
367
  │ ├── document_generator.py # 文档生成器 (MD/PDF/HTML)
@@ -385,7 +385,7 @@ staran/
385
385
  │ ├── __init__.py # 工具模块
386
386
  │ └── date.py # Date类实现
387
387
  ├── setup.py # 安装配置
388
- ├── README.md # 本文档 v0.4.1
388
+ ├── README.md # 本文档 v0.4.2
389
389
  └── quick-upload.sh # 快速部署脚本
390
390
  ```
391
391
 
@@ -561,4 +561,4 @@ MIT License
561
561
 
562
562
  ---
563
563
 
564
- **Staran v0.4.1** - 模块化引擎架构,让机器学习特征工程变得前所未有的简单 🌟
564
+ **Staran v0.4.2** - 模块化引擎架构,让机器学习特征工程变得前所未有的简单 🌟
@@ -1,4 +1,4 @@
1
- # Star## ✨ v0.4.1 新特性
1
+ # Star## ✨ v0.4.2 新特性
2
2
 
3
3
  - 📋 **独立Schema模块** - 专门的表结构定义和管理模块
4
4
  - 📄 **文档自动生成** - 支持Markdown/PDF/HTML格式的技术文档生成
@@ -16,7 +16,7 @@
16
16
 
17
17
  Staran是一个强大的特征工程和数据处理工具包,提供从数据到模型的完整解决方案。特别针对工银图灵平台优化,让特征工程和模型训练变得前所未有的简单。
18
18
 
19
- ## ✨ v0.4.1 新特性
19
+ ## ✨ v0.4.2 新特性
20
20
 
21
21
  - �️ **模块化引擎架构** - 独立的引擎模块,支持Spark、Hive、图灵平台
22
22
  - 🔧 **统一接口设计** - 所有引擎提供一致的SQL生成、执行和下载接口
@@ -335,7 +335,7 @@ tomorrow = date.add_days(1) # 202504 (智能处理)
335
335
 
336
336
  ```
337
337
  staran/
338
- ├── __init__.py # 主包入口,v0.4.1功能导出
338
+ ├── __init__.py # 主包入口,v0.4.2功能导出
339
339
  ├── schemas/ # 🆕 表结构定义与文档生成模块
340
340
  │ ├── __init__.py # Schema模块入口
341
341
  │ ├── document_generator.py # 文档生成器 (MD/PDF/HTML)
@@ -359,7 +359,7 @@ staran/
359
359
  │ ├── __init__.py # 工具模块
360
360
  │ └── date.py # Date类实现
361
361
  ├── setup.py # 安装配置
362
- ├── README.md # 本文档 v0.4.1
362
+ ├── README.md # 本文档 v0.4.2
363
363
  └── quick-upload.sh # 快速部署脚本
364
364
  ```
365
365
 
@@ -535,4 +535,4 @@ MIT License
535
535
 
536
536
  ---
537
537
 
538
- **Staran v0.4.1** - 模块化引擎架构,让机器学习特征工程变得前所未有的简单 🌟
538
+ **Staran v0.4.2** - 模块化引擎架构,让机器学习特征工程变得前所未有的简单 🌟
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
2
2
 
3
3
  setup(
4
4
  name="staran",
5
- version="0.4.1",
5
+ version="0.4.2",
6
6
  description="staran - 高性能Python工具库",
7
7
  long_description=open("README.md", encoding="utf-8").read(),
8
8
  long_description_content_type="text/markdown",
@@ -0,0 +1,306 @@
1
+ """
2
+ AUM业务表结构定义模块
3
+
4
+ 包含AUM (资产管理)业务相关的所有标准表结构:
5
+ - 客户行为特征表 (AUMBehaviorSchema)
6
+ - 资产平均值表 (AUMAssetAvgSchema)
7
+ - 资产配置表 (AUMAssetConfigSchema)
8
+ - 月度统计表 (AUMMonthlyStatSchema)
9
+
10
+ 这些表结构可以用于:
11
+ 1. 特征工程流水线
12
+ 2. 数据模型构建
13
+ 3. 业务文档生成
14
+ 4. 数据质量检查
15
+ """
16
+
17
+ from typing import Dict
18
+ from ...features.schema import TableSchema
19
+ from ..document_generator import SchemaDocumentGenerator
20
+
21
+
22
+ class AUMBehaviorSchema:
23
+ """AUM客户行为特征表 - A表结构定义(严格按照已提供给行方的字段)"""
24
+
25
+ # 统计指标定义 - 与原始定义完全一致
26
+ _STATS = [
27
+ ("max", "最大值"),
28
+ ("min", "最小值"),
29
+ ("sum", "总和"),
30
+ ("avg", "均值"),
31
+ ("var", "方差"),
32
+ ("std", "标准差"),
33
+ ("rng", "极差"),
34
+ ("med", "中位数"),
35
+ ]
36
+
37
+ @staticmethod
38
+ def create() -> TableSchema:
39
+ """创建客户行为特征表结构 - 严格按照已提供给行方的字段定义"""
40
+ schema = TableSchema('bi_hlwj_dfcw_f1_f4_wy')
41
+ schema.add_primary_key('party_id', 'string')
42
+ schema.add_date_field('data_dt', 'string')
43
+
44
+ # 基础字段 - 严格按照原始定义
45
+ schema.add_field("buy_ct", "int", comment="购买次数", aggregatable=True)
46
+ schema.add_field("recency", "int", comment="最近一次购买距今天数", aggregatable=True)
47
+ schema.add_field("tenure", "int", comment="客户关系持续时间", aggregatable=True)
48
+ schema.add_field("window1", "string", comment="时间窗口标记")
49
+ schema.add_field("freq", "float", comment="总购买频率", aggregatable=True)
50
+ schema.add_field("freq1", "float", comment="最近时间段购买频率", aggregatable=True)
51
+ schema.add_field("productidcount", "int", comment="产品种类数", aggregatable=True)
52
+ schema.add_field("orderidcount", "int", comment="订单数", aggregatable=True)
53
+ schema.add_field("label", "float", comment="标签值(如是否购买)", aggregatable=True)
54
+
55
+ # productamount and m1 ~ m4 的含义描述 - 严格按照原始定义
56
+ m_fields = {
57
+ "productamount": "购买金额",
58
+ "m1": "去重订单数",
59
+ "m2": "去重商品数",
60
+ "m3": "去重渠道数",
61
+ "m4": "去重产品品类数",
62
+ }
63
+
64
+ # 使用循环注册 productamount and m1~m4 各统计字段 - 严格按照原始逻辑
65
+ for prefix, meaning in m_fields.items():
66
+ for stat_key, stat_desc in AUMBehaviorSchema._STATS:
67
+ field_name = f"{prefix}_{stat_key}"
68
+ description = f"{meaning}的{stat_desc}"
69
+ schema.add_field(field_name, "float", comment=description, aggregatable=True)
70
+
71
+ # 其他字段 - 严格按照原始定义
72
+ schema.add_field("life_day", "float", comment="客户生命周期天数", aggregatable=True)
73
+ schema.add_field("gender", "float", comment="性别(编码)", aggregatable=True)
74
+ schema.add_field("open_day", "float", comment="开户天数", aggregatable=True)
75
+
76
+ schema.set_monthly_unique(False) # A表每人每日记录
77
+ return schema
78
+ schema.add_field("FREQ_BRANCH_CODE", "string", comment="常用网点代码")
79
+ schema.add_field("CROSS_REGION_TXN", "string", comment="跨地区交易情况")
80
+
81
+ schema.set_monthly_unique(False) # A表每人每天一条记录
82
+ return schema
83
+
84
+
85
+ class AUMAssetAvgSchema:
86
+ """AUM资产平均值表 - B表结构定义(严格按照已提供给行方的字段)"""
87
+
88
+ @staticmethod
89
+ def create() -> TableSchema:
90
+ """创建资产平均值表结构 - 严格按照原始定义"""
91
+ schema = TableSchema('bi_hlwj_zi_chan_avg_wy')
92
+ schema.add_primary_key('party_id', 'string')
93
+ schema.add_date_field('data_dt', 'string')
94
+
95
+ # 基础余额字段 - 严格按照原始定义
96
+ schema.add_field("asset_total_bal", "decimal", comment="总资产余额", aggregatable=True)
97
+ schema.add_field("liab_total_bal", "decimal", comment="总负债余额", aggregatable=True)
98
+ schema.add_field("dpsit_total_bal", "decimal", comment="存款总余额", aggregatable=True)
99
+ schema.add_field("loan_total_bal", "decimal", comment="贷款总余额", aggregatable=True)
100
+ schema.add_field("card_total_bal", "decimal", comment="信用卡总余额", aggregatable=True)
101
+ schema.add_field("mid_busi_total_bal", "decimal", comment="中间业务总余额", aggregatable=True)
102
+
103
+ # Register average balance fields - 严格按照原始逻辑
104
+ for period in ["month", "year", "3", "6", "12"]:
105
+ schema.add_field(
106
+ f"avg_asset_bal_{period}", "decimal", comment=f"平均资产余额 ({period}期)", aggregatable=True
107
+ )
108
+ for period in ["3", "12"]:
109
+ schema.add_field(
110
+ f"avg_dpsit_bal_{period}", "decimal", comment=f"平均存款余额 ({period}期)", aggregatable=True
111
+ )
112
+
113
+ schema.set_monthly_unique(True) # B表每人每月唯一
114
+ return schema
115
+
116
+
117
+ class AUMAssetConfigSchema:
118
+ """AUM资产配置表 - C表结构定义(严格按照已提供给行方的字段)"""
119
+
120
+ @staticmethod
121
+ def create() -> TableSchema:
122
+ """创建资产配置表结构 - 严格按照原始定义"""
123
+ schema = TableSchema('bi_hlwj_zi_chang_month_total_zb')
124
+ schema.add_primary_key('party_id', 'string')
125
+ schema.add_date_field('data_dt', 'string')
126
+
127
+ # asset_fields - 严格按照原始定义
128
+ asset_fields = [
129
+ ("seg_asset_total", "总资产余额"),
130
+ ("indv_consm_loan_amt", "个人消费贷款余额"),
131
+ ("indv_house_loan_amt", "个人住房贷款余额"),
132
+ ("indv_oper_loan_amt", "个人经营贷款余额"),
133
+ ("dpsit_bal", "存款余额"),
134
+ ("tbond_bal", "国债余额"),
135
+ ("fund_bal", "基金余额"),
136
+ ("bond_bal", "债券余额"),
137
+ ("gold_bal", "黄金余额"),
138
+ ("wcurr_chrem_bal", "外币现钞余额"),
139
+ ("presv_mgmt_secu_bal", "保值管理证券余额"),
140
+ ("insure_form_bal", "保险单余额"),
141
+ ("crdt_card_od_bal", "信用卡透支余额"),
142
+ ("crdt_card_con_amt", "信用卡消费金额"),
143
+ ("semi_crdt_card_od_bal", "准贷记卡透支余额"),
144
+ ("semi_crdt_card_con_amt", "准贷记卡消费金额"),
145
+ ("inter_card_con_amt", "国际卡消费金额"),
146
+ ("inter_card_od_bal", "国际卡透支余额"),
147
+ ("crdt_card_dpsit_bal", "信用卡存款余额"),
148
+ ("semi_crdt_card_dpsit_bal", "准贷记卡存款余额"),
149
+ ("inter_card_dpsit_bal", "国际卡存款余额"),
150
+ ("silver_bal", "白银余额"),
151
+ ("agent_solid_silver_bal", "代发实物白银余额"),
152
+ ("pt_bal", "个人养老金余额"),
153
+ ("pd_bal", "个人养老金存款余额"),
154
+ ("other_metal_bal", "其他金属余额"),
155
+ ("curr_dpsit_bal", "活期存款余额"),
156
+ ("time_dpsit_bal", "定期存款余额"),
157
+ ("oil_bal", "石油余额"),
158
+ ("fx_bal", "外汇余额"),
159
+ ]
160
+
161
+ # 严格按照原始循环逻辑注册字段
162
+ for field_name, desc in asset_fields:
163
+ schema.add_field(field_name, "decimal", comment=desc, aggregatable=True)
164
+
165
+ schema.set_monthly_unique(True) # C表每人每月唯一
166
+ return schema
167
+
168
+
169
+ class AUMMonthlyStatSchema:
170
+ """AUM月度统计表 - D表结构定义(严格按照已提供给行方的字段)"""
171
+
172
+ @staticmethod
173
+ def create() -> TableSchema:
174
+ """创建月度统计表结构 - 严格按照原始定义"""
175
+ schema = TableSchema('bi_hlwj_realy_month_stat_wy')
176
+ schema.add_primary_key('party_id', 'string') # 修正主键名称
177
+ schema.add_date_field('data_dt', 'string')
178
+
179
+ # channels字典 - 严格按照原始定义
180
+ channels = {
181
+ "CASH_DEPIST": "现金",
182
+ "REMIT": "汇款",
183
+ "YY": "邮政储蓄",
184
+ "UNIONPAY": "银联",
185
+ "FIN_ASSET": "理财产品",
186
+ "CORP_ACCT": "对公账户",
187
+ }
188
+
189
+ # 注册存入和取出字段 - 严格按照原始逻辑
190
+ for prefix, desc in channels.items():
191
+ schema.add_field(f"{prefix}_IN", "decimal", comment=f"{desc}存入金额", aggregatable=True)
192
+ schema.add_field(f"{prefix}_OUT", "decimal", comment=f"{desc}取出金额", aggregatable=True)
193
+
194
+ # 其他特定字段 - 严格按照原始定义
195
+ schema.add_field("AGENT_SALARY_IN", "decimal", comment="代发工资存入金额", aggregatable=True)
196
+ schema.add_field("CREDIT_CARD_OUT", "decimal", comment="信用卡取出金额", aggregatable=True)
197
+ schema.add_field("DEBIT_CARD_OUT", "decimal", comment="借记卡取出金额", aggregatable=True)
198
+ schema.add_field("BATCH_DEDUCT_OUT", "decimal", comment="批量扣款金额", aggregatable=True)
199
+
200
+ # 定义字段结构:交易渠道、指标、时间范围、描述前缀 - 严格按照原始定义
201
+ fields = [
202
+ ("DEBIT_CARD", "借记卡", "MON3"),
203
+ ("CREDIT_CARD", "信用卡", "MON3"),
204
+ ("THIRD_PAYMENT", "第三方支付", "MON3"),
205
+ ("MOBBANK", "手机银行", "MON12"),
206
+ ("TELBANK", "电话银行", "MON12"),
207
+ ]
208
+
209
+ # 定义交易指标 - 严格按照原始定义
210
+ metrics = [
211
+ ("TX_CNT", "交易次数"),
212
+ ("TX_AMT", "交易金额"),
213
+ ]
214
+
215
+ # 自动注册 - 严格按照原始逻辑
216
+ for channel, desc, period in fields:
217
+ for metric_code, metric_desc in metrics:
218
+ field_name = f"{channel}_{metric_code}_{period}"
219
+ description = f"{desc}{metric_desc}(近{period[-2:]}个月)"
220
+ schema.add_field(field_name, "decimal", comment=description, aggregatable=True)
221
+
222
+ # 其他固定字段 - 严格按照原始定义
223
+ schema.add_field(
224
+ "COUNTER_TX_CNT_MON12", "int", comment="柜台交易次数(近12个月)", aggregatable=True
225
+ )
226
+ schema.add_field(
227
+ "WEBBANK_TX_CNT_MON12", "int", comment="网银交易次数(近12个月)", aggregatable=True
228
+ )
229
+
230
+ # 编号国家(1~5) - 严格按照原始循环逻辑
231
+ for i in range(1, 6):
232
+ schema.add_field(
233
+ f"Y1_OVERS_CTY{i}_CNT", "int", comment=f"近一年境外国家{i}的交易次数", aggregatable=True
234
+ )
235
+ schema.add_field(
236
+ f"Y1_OVERS_CNT_CTY{i}_CD",
237
+ "string",
238
+ comment=f"近一年境外国家{i}的交易次数(编码)",
239
+ )
240
+ schema.add_field(
241
+ f"Y1_OVERS_CTY{i}_AMT", "decimal", comment=f"近一年境外国家{i}的交易金额", aggregatable=True
242
+ )
243
+ schema.add_field(
244
+ f"Y1_OVERS_AMT_CTY{i}_CD",
245
+ "string",
246
+ comment=f"近一年境外国家{i}的交易金额(编码)",
247
+ )
248
+
249
+ # 其他国家 - 严格按照原始定义
250
+ schema.add_field(
251
+ "Y1_OVERS_OTHER_CTY_CNT", "int", comment="近一年其他境外国家的交易次数", aggregatable=True
252
+ )
253
+ schema.add_field(
254
+ "Y1_OVERS_OTHER_CTY_AMT", "decimal", comment="近一年其他境外国家的交易金额", aggregatable=True
255
+ )
256
+
257
+ schema.set_monthly_unique(True) # D表每人每月唯一
258
+ return schema
259
+
260
+
261
+ def get_aum_schemas() -> Dict[str, TableSchema]:
262
+ """获取所有AUM业务表结构"""
263
+ return {
264
+ 'behavior': AUMBehaviorSchema.create(),
265
+ 'asset_avg': AUMAssetAvgSchema.create(),
266
+ 'asset_config': AUMAssetConfigSchema.create(),
267
+ 'monthly_stat': AUMMonthlyStatSchema.create()
268
+ }
269
+
270
+
271
+ def export_aum_docs(output_dir: str = "./docs", format_type: str = "markdown") -> Dict[str, str]:
272
+ """
273
+ 导出AUM表结构文档
274
+
275
+ Args:
276
+ output_dir: 输出目录
277
+ format_type: 文档格式 ('markdown' 或 'pdf')
278
+
279
+ Returns:
280
+ 生成的文档文件路径字典
281
+ """
282
+ schemas = get_aum_schemas()
283
+ generator = SchemaDocumentGenerator()
284
+
285
+ results = {}
286
+ for table_type, schema in schemas.items():
287
+ file_path = generator.export_schema_doc(
288
+ schema=schema,
289
+ business_domain="AUM",
290
+ table_type=table_type,
291
+ output_dir=output_dir,
292
+ format_type=format_type
293
+ )
294
+ results[table_type] = file_path
295
+
296
+ return results
297
+
298
+
299
+ __all__ = [
300
+ 'AUMBehaviorSchema',
301
+ 'AUMAssetAvgSchema',
302
+ 'AUMAssetConfigSchema',
303
+ 'AUMMonthlyStatSchema',
304
+ 'get_aum_schemas',
305
+ 'export_aum_docs'
306
+ ]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: staran
3
- Version: 0.4.1
3
+ Version: 0.4.2
4
4
  Summary: staran - 高性能Python工具库
5
5
  Home-page: https://github.com/starlxa/staran
6
6
  Author: StarAn
@@ -24,7 +24,7 @@ Dynamic: requires-dist
24
24
  Dynamic: requires-python
25
25
  Dynamic: summary
26
26
 
27
- # Star## ✨ v0.4.1 新特性
27
+ # Star## ✨ v0.4.2 新特性
28
28
 
29
29
  - 📋 **独立Schema模块** - 专门的表结构定义和管理模块
30
30
  - 📄 **文档自动生成** - 支持Markdown/PDF/HTML格式的技术文档生成
@@ -42,7 +42,7 @@ Dynamic: summary
42
42
 
43
43
  Staran是一个强大的特征工程和数据处理工具包,提供从数据到模型的完整解决方案。特别针对工银图灵平台优化,让特征工程和模型训练变得前所未有的简单。
44
44
 
45
- ## ✨ v0.4.1 新特性
45
+ ## ✨ v0.4.2 新特性
46
46
 
47
47
  - �️ **模块化引擎架构** - 独立的引擎模块,支持Spark、Hive、图灵平台
48
48
  - 🔧 **统一接口设计** - 所有引擎提供一致的SQL生成、执行和下载接口
@@ -361,7 +361,7 @@ tomorrow = date.add_days(1) # 202504 (智能处理)
361
361
 
362
362
  ```
363
363
  staran/
364
- ├── __init__.py # 主包入口,v0.4.1功能导出
364
+ ├── __init__.py # 主包入口,v0.4.2功能导出
365
365
  ├── schemas/ # 🆕 表结构定义与文档生成模块
366
366
  │ ├── __init__.py # Schema模块入口
367
367
  │ ├── document_generator.py # 文档生成器 (MD/PDF/HTML)
@@ -385,7 +385,7 @@ staran/
385
385
  │ ├── __init__.py # 工具模块
386
386
  │ └── date.py # Date类实现
387
387
  ├── setup.py # 安装配置
388
- ├── README.md # 本文档 v0.4.1
388
+ ├── README.md # 本文档 v0.4.2
389
389
  └── quick-upload.sh # 快速部署脚本
390
390
  ```
391
391
 
@@ -561,4 +561,4 @@ MIT License
561
561
 
562
562
  ---
563
563
 
564
- **Staran v0.4.1** - 模块化引擎架构,让机器学习特征工程变得前所未有的简单 🌟
564
+ **Staran v0.4.2** - 模块化引擎架构,让机器学习特征工程变得前所未有的简单 🌟
@@ -1,314 +0,0 @@
1
- """
2
- AUM业务表结构定义模块
3
-
4
- 包含AUM (资产管理)业务相关的所有标准表结构:
5
- - 客户行为特征表 (AUMBehaviorSchema)
6
- - 资产平均值表 (AUMAssetAvgSchema)
7
- - 资产配置表 (AUMAssetConfigSchema)
8
- - 月度统计表 (AUMMonthlyStatSchema)
9
-
10
- 这些表结构可以用于:
11
- 1. 特征工程流水线
12
- 2. 数据模型构建
13
- 3. 业务文档生成
14
- 4. 数据质量检查
15
- """
16
-
17
- from typing import Dict
18
- from ...features.schema import TableSchema
19
- from ..document_generator import SchemaDocumentGenerator
20
-
21
-
22
- class AUMBehaviorSchema:
23
- """AUM客户行为特征表 - A表结构定义"""
24
-
25
- @staticmethod
26
- def create() -> TableSchema:
27
- """创建客户行为特征表结构"""
28
- schema = TableSchema('bi_hlwj_dfcw_f1_f4_wy')
29
- schema.add_primary_key('party_id', 'string')
30
- schema.add_date_field('data_dt', 'string')
31
-
32
- # 基础信息字段
33
- schema.add_field("AGE", "int", comment="客户年龄", aggregatable=True)
34
- schema.add_field("GENDER", "string", comment="客户性别")
35
- schema.add_field("EDU_LEVEL", "string", comment="教育水平")
36
- schema.add_field("MARITAL_STATUS", "string", comment="婚姻状况")
37
- schema.add_field("INCOME_LEVEL", "string", comment="收入水平")
38
- schema.add_field("OCCUPATION", "string", comment="职业类型")
39
- schema.add_field("CITY_LEVEL", "string", comment="城市等级")
40
-
41
- # 账户信息字段
42
- schema.add_field("ACCT_OPEN_MONTHS", "int", comment="开户月数", aggregatable=True)
43
- schema.add_field("MAIN_ACCT_BAL", "decimal", comment="主账户余额", aggregatable=True)
44
- schema.add_field("ACCT_COUNT", "int", comment="账户总数", aggregatable=True)
45
- schema.add_field("DEPOSIT_ACCT_COUNT", "int", comment="存款账户数", aggregatable=True)
46
- schema.add_field("LOAN_ACCT_COUNT", "int", comment="贷款账户数", aggregatable=True)
47
- schema.add_field("CREDIT_CARD_COUNT", "int", comment="信用卡数量", aggregatable=True)
48
-
49
- # 交易行为字段
50
- schema.add_field("MON3_TXN_COUNT", "int", comment="近3月交易次数", aggregatable=True)
51
- schema.add_field("MON3_TXN_AMT", "decimal", comment="近3月交易金额", aggregatable=True)
52
- schema.add_field("MON6_TXN_COUNT", "int", comment="近6月交易次数", aggregatable=True)
53
- schema.add_field("MON6_TXN_AMT", "decimal", comment="近6月交易金额", aggregatable=True)
54
- schema.add_field("YEAR1_TXN_COUNT", "int", comment="近1年交易次数", aggregatable=True)
55
- schema.add_field("YEAR1_TXN_AMT", "decimal", comment="近1年交易金额", aggregatable=True)
56
-
57
- # 渠道使用偏好
58
- schema.add_field("ONLINE_BANK_USAGE", "string", comment="网银使用频度")
59
- schema.add_field("MOBILE_BANK_USAGE", "string", comment="手机银行使用频度")
60
- schema.add_field("ATM_USAGE", "string", comment="ATM使用频度")
61
- schema.add_field("COUNTER_USAGE", "string", comment="柜台使用频度")
62
-
63
- # 产品持有情况
64
- schema.add_field("WEALTH_PROD_COUNT", "int", comment="理财产品数量", aggregatable=True)
65
- schema.add_field("FUND_PROD_COUNT", "int", comment="基金产品数量", aggregatable=True)
66
- schema.add_field("INSURANCE_PROD_COUNT", "int", comment="保险产品数量", aggregatable=True)
67
- schema.add_field("GOLD_PROD_COUNT", "int", comment="黄金产品数量", aggregatable=True)
68
-
69
- # 风险评级相关
70
- schema.add_field("RISK_LEVEL", "string", comment="风险等级")
71
- schema.add_field("RISK_APPETITE", "string", comment="风险偏好")
72
- schema.add_field("INVESTMENT_EXPERIENCE", "string", comment="投资经验")
73
-
74
- # 服务渠道偏好
75
- schema.add_field("PREFER_CHANNEL", "string", comment="偏好服务渠道")
76
- schema.add_field("CONTACT_TIME_PREFER", "string", comment="联系时间偏好")
77
-
78
- # 客户活跃度指标
79
- schema.add_field("LOGIN_DAYS_MON3", "int", comment="近3月登录天数", aggregatable=True)
80
- schema.add_field("LOGIN_DAYS_MON6", "int", comment="近6月登录天数", aggregatable=True)
81
- schema.add_field("LAST_LOGIN_DAYS", "int", comment="最后登录距今天数", aggregatable=True)
82
- schema.add_field("ACTIVE_LEVEL", "string", comment="活跃度等级")
83
-
84
- # 客户价值指标
85
- schema.add_field("CUSTOMER_VALUE_SCORE", "decimal", comment="客户价值评分", aggregatable=True)
86
- schema.add_field("POTENTIAL_VALUE_SCORE", "decimal", comment="潜在价值评分", aggregatable=True)
87
- schema.add_field("RETENTION_SCORE", "decimal", comment="留存倾向评分", aggregatable=True)
88
-
89
- # 营销响应历史
90
- schema.add_field("CAMPAIGN_RESPONSE_RATE", "decimal", comment="营销响应率", aggregatable=True)
91
- schema.add_field("LAST_CAMPAIGN_RESPONSE", "string", comment="最近营销响应")
92
- schema.add_field("PRODUCT_CROSS_SELL_COUNT", "int", comment="交叉销售产品数", aggregatable=True)
93
-
94
- # 投诉与满意度
95
- schema.add_field("COMPLAINT_COUNT_YEAR1", "int", comment="近1年投诉次数", aggregatable=True)
96
- schema.add_field("SATISFACTION_SCORE", "decimal", comment="满意度评分", aggregatable=True)
97
- schema.add_field("NPS_SCORE", "decimal", comment="净推荐值", aggregatable=True)
98
-
99
- # 地理位置相关
100
- schema.add_field("HOME_BRANCH_CODE", "string", comment="归属网点代码")
101
- schema.add_field("FREQ_BRANCH_CODE", "string", comment="常用网点代码")
102
- schema.add_field("CROSS_REGION_TXN", "string", comment="跨地区交易情况")
103
-
104
- schema.set_monthly_unique(False) # A表每人每天一条记录
105
- return schema
106
-
107
-
108
- class AUMAssetAvgSchema:
109
- """AUM资产平均值表 - B表结构定义"""
110
-
111
- @staticmethod
112
- def create() -> TableSchema:
113
- """创建资产平均值表结构"""
114
- schema = TableSchema('bi_hlwj_zi_chan_avg_wy')
115
- schema.add_primary_key('party_id', 'string')
116
- schema.add_date_field('data_dt', 'string')
117
-
118
- # 各类资产平均余额
119
- schema.add_field("TOTAL_ASSET_AVG", "decimal", comment="总资产平均值", aggregatable=True)
120
- schema.add_field("DEPOSIT_AVG", "decimal", comment="存款平均余额", aggregatable=True)
121
- schema.add_field("CURRENT_DEPOSIT_AVG", "decimal", comment="活期存款平均余额", aggregatable=True)
122
- schema.add_field("TIME_DEPOSIT_AVG", "decimal", comment="定期存款平均余额", aggregatable=True)
123
- schema.add_field("WEALTH_PRODUCT_AVG", "decimal", comment="理财产品平均余额", aggregatable=True)
124
- schema.add_field("FUND_ASSET_AVG", "decimal", comment="基金资产平均值", aggregatable=True)
125
- schema.add_field("INSURANCE_ASSET_AVG", "decimal", comment="保险资产平均值", aggregatable=True)
126
- schema.add_field("BOND_ASSET_AVG", "decimal", comment="债券资产平均值", aggregatable=True)
127
- schema.add_field("STOCK_ASSET_AVG", "decimal", comment="股票资产平均值", aggregatable=True)
128
- schema.add_field("GOLD_ASSET_AVG", "decimal", comment="黄金资产平均值", aggregatable=True)
129
- schema.add_field("FOREX_ASSET_AVG", "decimal", comment="外汇资产平均值", aggregatable=True)
130
-
131
- # 负债相关平均值
132
- schema.add_field("TOTAL_DEBT_AVG", "decimal", comment="总负债平均值", aggregatable=True)
133
- schema.add_field("MORTGAGE_DEBT_AVG", "decimal", comment="房贷平均余额", aggregatable=True)
134
- schema.add_field("CREDIT_CARD_DEBT_AVG", "decimal", comment="信用卡负债平均值", aggregatable=True)
135
- schema.add_field("OTHER_LOAN_AVG", "decimal", comment="其他贷款平均余额", aggregatable=True)
136
-
137
- schema.set_monthly_unique(True) # B表每人每月唯一
138
- return schema
139
-
140
-
141
- class AUMAssetConfigSchema:
142
- """AUM资产配置表 - C表结构定义"""
143
-
144
- @staticmethod
145
- def create() -> TableSchema:
146
- """创建资产配置表结构"""
147
- schema = TableSchema('bi_hlwj_zi_chang_month_total_zb')
148
- schema.add_primary_key('party_id', 'string')
149
- schema.add_date_field('data_dt', 'string')
150
-
151
- # 资产配置比例
152
- schema.add_field("DEPOSIT_RATIO", "decimal", comment="存款资产占比", aggregatable=True)
153
- schema.add_field("WEALTH_RATIO", "decimal", comment="理财产品占比", aggregatable=True)
154
- schema.add_field("FUND_RATIO", "decimal", comment="基金资产占比", aggregatable=True)
155
- schema.add_field("INSURANCE_RATIO", "decimal", comment="保险资产占比", aggregatable=True)
156
- schema.add_field("BOND_RATIO", "decimal", comment="债券资产占比", aggregatable=True)
157
- schema.add_field("STOCK_RATIO", "decimal", comment="股票资产占比", aggregatable=True)
158
- schema.add_field("GOLD_RATIO", "decimal", comment="黄金资产占比", aggregatable=True)
159
- schema.add_field("FOREX_RATIO", "decimal", comment="外汇资产占比", aggregatable=True)
160
-
161
- # 风险资产vs安全资产配置
162
- schema.add_field("HIGH_RISK_RATIO", "decimal", comment="高风险资产占比", aggregatable=True)
163
- schema.add_field("MEDIUM_RISK_RATIO", "decimal", comment="中风险资产占比", aggregatable=True)
164
- schema.add_field("LOW_RISK_RATIO", "decimal", comment="低风险资产占比", aggregatable=True)
165
- schema.add_field("SAFE_ASSET_RATIO", "decimal", comment="安全资产占比", aggregatable=True)
166
-
167
- # 流动性配置
168
- schema.add_field("HIGH_LIQUIDITY_RATIO", "decimal", comment="高流动性资产占比", aggregatable=True)
169
- schema.add_field("MEDIUM_LIQUIDITY_RATIO", "decimal", comment="中流动性资产占比", aggregatable=True)
170
- schema.add_field("LOW_LIQUIDITY_RATIO", "decimal", comment="低流动性资产占比", aggregatable=True)
171
-
172
- # 期限结构配置
173
- schema.add_field("SHORT_TERM_RATIO", "decimal", comment="短期资产占比", aggregatable=True)
174
- schema.add_field("MEDIUM_TERM_RATIO", "decimal", comment="中期资产占比", aggregatable=True)
175
- schema.add_field("LONG_TERM_RATIO", "decimal", comment="长期资产占比", aggregatable=True)
176
-
177
- # 货币配置
178
- schema.add_field("RMB_ASSET_RATIO", "decimal", comment="人民币资产占比", aggregatable=True)
179
- schema.add_field("USD_ASSET_RATIO", "decimal", comment="美元资产占比", aggregatable=True)
180
- schema.add_field("EUR_ASSET_RATIO", "decimal", comment="欧元资产占比", aggregatable=True)
181
- schema.add_field("OTHER_CURRENCY_RATIO", "decimal", comment="其他货币资产占比", aggregatable=True)
182
-
183
- # 配置集中度指标
184
- schema.add_field("ASSET_CONCENTRATION_INDEX", "decimal", comment="资产集中度指数", aggregatable=True)
185
- schema.add_field("DIVERSIFICATION_SCORE", "decimal", comment="分散化程度评分", aggregatable=True)
186
-
187
- # 动态配置指标
188
- schema.add_field("CONFIG_CHANGE_FREQ", "int", comment="配置调整频率", aggregatable=True)
189
- schema.add_field("LAST_REBALANCE_DAYS", "int", comment="最后再平衡距今天数", aggregatable=True)
190
-
191
- # 配置绩效相关
192
- schema.add_field("CONFIG_RETURN_RATE", "decimal", comment="配置收益率", aggregatable=True)
193
- schema.add_field("RISK_ADJUSTED_RETURN", "decimal", comment="风险调整收益", aggregatable=True)
194
- schema.add_field("SHARPE_RATIO", "decimal", comment="夏普比率", aggregatable=True)
195
-
196
- # 配置建议相关
197
- schema.add_field("OPTIMAL_CONFIG_SCORE", "decimal", comment="最优配置评分", aggregatable=True)
198
- schema.add_field("CONFIG_IMPROVEMENT_POTENTIAL", "decimal", comment="配置优化潜力", aggregatable=True)
199
-
200
- schema.set_monthly_unique(True) # C表每人每月唯一
201
- return schema
202
-
203
-
204
- class AUMMonthlyStatSchema:
205
- """AUM月度统计表 - D表结构定义"""
206
-
207
- @staticmethod
208
- def create() -> TableSchema:
209
- """创建月度统计表结构"""
210
- schema = TableSchema('bi_hlwj_realy_month_stat_wy')
211
- schema.add_primary_key('party_dt', 'string') # 注意这个表的主键是party_dt
212
- schema.add_date_field('data_dt', 'string')
213
-
214
- # 渠道存取款字段
215
- channels = {
216
- "CASH_DEPIST": "现金",
217
- "REMIT": "汇款",
218
- "YY": "邮政储蓄",
219
- "UNIONPAY": "银联",
220
- "FIN_ASSET": "理财产品",
221
- "CORP_ACCT": "对公账户"
222
- }
223
-
224
- for prefix, desc in channels.items():
225
- schema.add_field(f"{prefix}_IN", "decimal", comment=f"{desc}存入金额", aggregatable=True)
226
- schema.add_field(f"{prefix}_OUT", "decimal", comment=f"{desc}取出金额", aggregatable=True)
227
-
228
- # 其他存取款字段
229
- schema.add_field("AGENT_SALARY_IN", "decimal", comment="代发工资存入金额", aggregatable=True)
230
- schema.add_field("CREDIT_CARD_OUT", "decimal", comment="信用卡取出金额", aggregatable=True)
231
- schema.add_field("DEBIT_CARD_OUT", "decimal", comment="借记卡取出金额", aggregatable=True)
232
- schema.add_field("BATCH_DEDUCT_OUT", "decimal", comment="批量扣款金额", aggregatable=True)
233
-
234
- # 交易渠道指标字段
235
- fields = [
236
- ("DEBIT_CARD", "借记卡", "MON3"),
237
- ("CREDIT_CARD", "信用卡", "MON3"),
238
- ("THIRD_PAYMENT", "第三方支付", "MON3"),
239
- ("MOBBANK", "手机银行", "MON12"),
240
- ("TELBANK", "电话银行", "MON12")
241
- ]
242
-
243
- metrics = [("TX_CNT", "交易次数"), ("TX_AMT", "交易金额")]
244
-
245
- for channel, desc, period in fields:
246
- for metric_code, metric_desc in metrics:
247
- field_name = f"{channel}_{metric_code}_{period}"
248
- description = f"{desc}{metric_desc}(近{period[-2:]}个月)"
249
- schema.add_field(field_name, "decimal", comment=description, aggregatable=True)
250
-
251
- # 其他交易字段
252
- schema.add_field("COUNTER_TX_CNT_MON12", "int", comment="柜台交易次数(近12个月)", aggregatable=True)
253
- schema.add_field("WEBBANK_TX_CNT_MON12", "int", comment="网银交易次数(近12个月)", aggregatable=True)
254
-
255
- # 境外交易字段
256
- for i in range(1, 6):
257
- schema.add_field(f"Y1_OVERS_CTY{i}_CNT", "int", comment=f"近一年境外国家{i}的交易次数", aggregatable=True)
258
- schema.add_field(f"Y1_OVERS_CNT_CTY{i}_CD", "string", comment=f"近一年境外国家{i}的交易次数(编码)")
259
- schema.add_field(f"Y1_OVERS_CTY{i}_AMT", "decimal", comment=f"近一年境外国家{i}的交易金额", aggregatable=True)
260
- schema.add_field(f"Y1_OVERS_AMT_CTY{i}_CD", "string", comment=f"近一年境外国家{i}的交易金额(编码)")
261
-
262
- schema.add_field("Y1_OVERS_OTHER_CTY_CNT", "int", comment="近一年其他境外国家的交易次数", aggregatable=True)
263
- schema.add_field("Y1_OVERS_OTHER_CTY_AMT", "decimal", comment="近一年其他境外国家的交易金额", aggregatable=True)
264
-
265
- schema.set_monthly_unique(True) # D表每人每月唯一
266
- return schema
267
-
268
-
269
- def get_aum_schemas() -> Dict[str, TableSchema]:
270
- """获取所有AUM业务表结构"""
271
- return {
272
- 'behavior': AUMBehaviorSchema.create(),
273
- 'asset_avg': AUMAssetAvgSchema.create(),
274
- 'asset_config': AUMAssetConfigSchema.create(),
275
- 'monthly_stat': AUMMonthlyStatSchema.create()
276
- }
277
-
278
-
279
- def export_aum_docs(output_dir: str = "./docs", format_type: str = "markdown") -> Dict[str, str]:
280
- """
281
- 导出AUM表结构文档
282
-
283
- Args:
284
- output_dir: 输出目录
285
- format_type: 文档格式 ('markdown' 或 'pdf')
286
-
287
- Returns:
288
- 生成的文档文件路径字典
289
- """
290
- schemas = get_aum_schemas()
291
- generator = SchemaDocumentGenerator()
292
-
293
- results = {}
294
- for table_type, schema in schemas.items():
295
- file_path = generator.export_schema_doc(
296
- schema=schema,
297
- business_domain="AUM",
298
- table_type=table_type,
299
- output_dir=output_dir,
300
- format_type=format_type
301
- )
302
- results[table_type] = file_path
303
-
304
- return results
305
-
306
-
307
- __all__ = [
308
- 'AUMBehaviorSchema',
309
- 'AUMAssetAvgSchema',
310
- 'AUMAssetConfigSchema',
311
- 'AUMMonthlyStatSchema',
312
- 'get_aum_schemas',
313
- 'export_aum_docs'
314
- ]
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes