staran 0.4.2__py3-none-any.whl → 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- staran/__init__.py +0 -261
- staran/banks/__init__.py +30 -0
- staran/banks/xinjiang_icbc/__init__.py +90 -0
- staran/models/__init__.py +72 -0
- staran/models/bank_configs.py +269 -0
- staran/models/config.py +271 -0
- staran/models/daifa_models.py +361 -0
- staran/models/registry.py +281 -0
- staran/models/target.py +321 -0
- staran/schemas/__init__.py +14 -15
- staran/schemas/aum/__init__.py +103 -199
- staran/tools/document_generator.py +350 -0
- {staran-0.4.2.dist-info → staran-0.6.0.dist-info}/METADATA +6 -6
- {staran-0.4.2.dist-info → staran-0.6.0.dist-info}/RECORD +17 -8
- {staran-0.4.2.dist-info → staran-0.6.0.dist-info}/WHEEL +0 -0
- {staran-0.4.2.dist-info → staran-0.6.0.dist-info}/licenses/LICENSE +0 -0
- {staran-0.4.2.dist-info → staran-0.6.0.dist-info}/top_level.txt +0 -0
staran/schemas/aum/__init__.py
CHANGED
@@ -1,26 +1,29 @@
|
|
1
1
|
"""
|
2
|
-
|
2
|
+
新疆工行代发长尾客户表结构定义模块
|
3
3
|
|
4
|
-
|
5
|
-
-
|
6
|
-
-
|
7
|
-
-
|
8
|
-
-
|
4
|
+
包含新疆工行代发长尾客户相关的所有表结构:
|
5
|
+
- 代发长尾客户行为特征表 (XinjiangICBCDaifaLongtailBehaviorSchema)
|
6
|
+
- 代发长尾客户资产平均值表 (XinjiangICBCDaifaLongtailAssetAvgSchema)
|
7
|
+
- 代发长尾客户资产配置表 (XinjiangICBCDaifaLongtailAssetConfigSchema)
|
8
|
+
- 代发长尾客户月度统计表 (XinjiangICBCDaifaLongtailMonthlyStatSchema)
|
9
|
+
|
10
|
+
数据库: xinjiang_icbc_daifa_longtail
|
11
|
+
业务范围: 代发长尾客户(资产10k-100k)
|
9
12
|
|
10
13
|
这些表结构可以用于:
|
11
|
-
1.
|
12
|
-
2.
|
14
|
+
1. 代发长尾客户特征工程
|
15
|
+
2. 提升模型和防流失模型构建
|
13
16
|
3. 业务文档生成
|
14
17
|
4. 数据质量检查
|
15
18
|
"""
|
16
19
|
|
17
20
|
from typing import Dict
|
18
21
|
from ...features.schema import TableSchema
|
19
|
-
from
|
22
|
+
from ...tools.document_generator import SchemaDocumentGenerator
|
20
23
|
|
21
24
|
|
22
|
-
class
|
23
|
-
"""
|
25
|
+
class XinjiangICBCDaifaLongtailBehaviorSchema:
|
26
|
+
"""新疆工行代发长尾客户行为特征表 - 严格按照已提供给行方的字段"""
|
24
27
|
|
25
28
|
# 统计指标定义 - 与原始定义完全一致
|
26
29
|
_STATS = [
|
@@ -36,8 +39,8 @@ class AUMBehaviorSchema:
|
|
36
39
|
|
37
40
|
@staticmethod
|
38
41
|
def create() -> TableSchema:
|
39
|
-
"""
|
40
|
-
schema = TableSchema('
|
42
|
+
"""创建新疆工行代发长尾客户行为特征表结构"""
|
43
|
+
schema = TableSchema('xinjiang_icbc_daifa_hlwj_dfcw_f1_f4_wy')
|
41
44
|
schema.add_primary_key('party_id', 'string')
|
42
45
|
schema.add_date_field('data_dt', 'string')
|
43
46
|
|
@@ -52,7 +55,7 @@ class AUMBehaviorSchema:
|
|
52
55
|
schema.add_field("orderidcount", "int", comment="订单数", aggregatable=True)
|
53
56
|
schema.add_field("label", "float", comment="标签值(如是否购买)", aggregatable=True)
|
54
57
|
|
55
|
-
# productamount and m1 ~ m4 的含义描述
|
58
|
+
# productamount and m1 ~ m4 的含义描述
|
56
59
|
m_fields = {
|
57
60
|
"productamount": "购买金额",
|
58
61
|
"m1": "去重订单数",
|
@@ -61,246 +64,147 @@ class AUMBehaviorSchema:
|
|
61
64
|
"m4": "去重产品品类数",
|
62
65
|
}
|
63
66
|
|
64
|
-
# 使用循环注册 productamount and m1~m4 各统计字段
|
67
|
+
# 使用循环注册 productamount and m1~m4 各统计字段
|
65
68
|
for prefix, meaning in m_fields.items():
|
66
|
-
for stat_key, stat_desc in
|
69
|
+
for stat_key, stat_desc in XinjiangICBCDaifaLongtailBehaviorSchema._STATS:
|
67
70
|
field_name = f"{prefix}_{stat_key}"
|
68
71
|
description = f"{meaning}的{stat_desc}"
|
69
72
|
schema.add_field(field_name, "float", comment=description, aggregatable=True)
|
70
73
|
|
71
|
-
# 其他字段
|
74
|
+
# 其他字段
|
72
75
|
schema.add_field("life_day", "float", comment="客户生命周期天数", aggregatable=True)
|
73
76
|
schema.add_field("gender", "float", comment="性别(编码)", aggregatable=True)
|
74
77
|
schema.add_field("open_day", "float", comment="开户天数", aggregatable=True)
|
75
78
|
|
76
|
-
schema.set_monthly_unique(False) #
|
77
|
-
return schema
|
78
|
-
schema.add_field("FREQ_BRANCH_CODE", "string", comment="常用网点代码")
|
79
|
-
schema.add_field("CROSS_REGION_TXN", "string", comment="跨地区交易情况")
|
80
|
-
|
81
|
-
schema.set_monthly_unique(False) # A表每人每天一条记录
|
79
|
+
schema.set_monthly_unique(False) # 每人每日记录
|
82
80
|
return schema
|
83
81
|
|
84
82
|
|
85
|
-
class
|
86
|
-
"""
|
83
|
+
class XinjiangICBCDaifaLongtailAssetAvgSchema:
|
84
|
+
"""新疆工行代发长尾客户资产平均值表"""
|
87
85
|
|
88
86
|
@staticmethod
|
89
87
|
def create() -> TableSchema:
|
90
|
-
"""
|
91
|
-
schema = TableSchema('
|
88
|
+
"""创建新疆工行代发长尾客户资产平均值表结构"""
|
89
|
+
schema = TableSchema('xinjiang_icbc_daifa_hlwj_zi_chan_avg_wy')
|
92
90
|
schema.add_primary_key('party_id', 'string')
|
93
91
|
schema.add_date_field('data_dt', 'string')
|
94
92
|
|
95
|
-
# 基础余额字段
|
93
|
+
# 基础余额字段
|
96
94
|
schema.add_field("asset_total_bal", "decimal", comment="总资产余额", aggregatable=True)
|
97
95
|
schema.add_field("liab_total_bal", "decimal", comment="总负债余额", aggregatable=True)
|
98
|
-
schema.add_field("
|
99
|
-
schema.add_field("loan_total_bal", "decimal", comment="贷款总余额", aggregatable=True)
|
100
|
-
schema.add_field("card_total_bal", "decimal", comment="信用卡总余额", aggregatable=True)
|
101
|
-
schema.add_field("mid_busi_total_bal", "decimal", comment="中间业务总余额", aggregatable=True)
|
102
|
-
|
103
|
-
# Register average balance fields - 严格按照原始逻辑
|
104
|
-
for period in ["month", "year", "3", "6", "12"]:
|
105
|
-
schema.add_field(
|
106
|
-
f"avg_asset_bal_{period}", "decimal", comment=f"平均资产余额 ({period}期)", aggregatable=True
|
107
|
-
)
|
108
|
-
for period in ["3", "12"]:
|
109
|
-
schema.add_field(
|
110
|
-
f"avg_dpsit_bal_{period}", "decimal", comment=f"平均存款余额 ({period}期)", aggregatable=True
|
111
|
-
)
|
96
|
+
schema.add_field("net_asset_bal", "decimal", comment="净资产余额", aggregatable=True)
|
112
97
|
|
113
|
-
|
98
|
+
# 存款相关字段
|
99
|
+
schema.add_field("dep_bal", "decimal", comment="存款余额", aggregatable=True)
|
100
|
+
schema.add_field("current_dep_bal", "decimal", comment="活期存款余额", aggregatable=True)
|
101
|
+
schema.add_field("time_dep_bal", "decimal", comment="定期存款余额", aggregatable=True)
|
102
|
+
|
103
|
+
# 理财投资字段
|
104
|
+
schema.add_field("wealth_bal", "decimal", comment="理财余额", aggregatable=True)
|
105
|
+
schema.add_field("fund_bal", "decimal", comment="基金余额", aggregatable=True)
|
106
|
+
schema.add_field("insurance_bal", "decimal", comment="保险余额", aggregatable=True)
|
107
|
+
|
108
|
+
schema.set_monthly_unique(True) # 每人每月一条记录
|
114
109
|
return schema
|
115
110
|
|
116
111
|
|
117
|
-
class
|
118
|
-
"""
|
112
|
+
class XinjiangICBCDaifaLongtailAssetConfigSchema:
|
113
|
+
"""新疆工行代发长尾客户资产配置表"""
|
119
114
|
|
120
115
|
@staticmethod
|
121
116
|
def create() -> TableSchema:
|
122
|
-
"""
|
123
|
-
schema = TableSchema('
|
117
|
+
"""创建新疆工行代发长尾客户资产配置表结构"""
|
118
|
+
schema = TableSchema('xinjiang_icbc_daifa_hlwj_zi_chan_config_wy')
|
124
119
|
schema.add_primary_key('party_id', 'string')
|
125
120
|
schema.add_date_field('data_dt', 'string')
|
126
121
|
|
127
|
-
#
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
("indv_oper_loan_amt", "个人经营贷款余额"),
|
133
|
-
("dpsit_bal", "存款余额"),
|
134
|
-
("tbond_bal", "国债余额"),
|
135
|
-
("fund_bal", "基金余额"),
|
136
|
-
("bond_bal", "债券余额"),
|
137
|
-
("gold_bal", "黄金余额"),
|
138
|
-
("wcurr_chrem_bal", "外币现钞余额"),
|
139
|
-
("presv_mgmt_secu_bal", "保值管理证券余额"),
|
140
|
-
("insure_form_bal", "保险单余额"),
|
141
|
-
("crdt_card_od_bal", "信用卡透支余额"),
|
142
|
-
("crdt_card_con_amt", "信用卡消费金额"),
|
143
|
-
("semi_crdt_card_od_bal", "准贷记卡透支余额"),
|
144
|
-
("semi_crdt_card_con_amt", "准贷记卡消费金额"),
|
145
|
-
("inter_card_con_amt", "国际卡消费金额"),
|
146
|
-
("inter_card_od_bal", "国际卡透支余额"),
|
147
|
-
("crdt_card_dpsit_bal", "信用卡存款余额"),
|
148
|
-
("semi_crdt_card_dpsit_bal", "准贷记卡存款余额"),
|
149
|
-
("inter_card_dpsit_bal", "国际卡存款余额"),
|
150
|
-
("silver_bal", "白银余额"),
|
151
|
-
("agent_solid_silver_bal", "代发实物白银余额"),
|
152
|
-
("pt_bal", "个人养老金余额"),
|
153
|
-
("pd_bal", "个人养老金存款余额"),
|
154
|
-
("other_metal_bal", "其他金属余额"),
|
155
|
-
("curr_dpsit_bal", "活期存款余额"),
|
156
|
-
("time_dpsit_bal", "定期存款余额"),
|
157
|
-
("oil_bal", "石油余额"),
|
158
|
-
("fx_bal", "外汇余额"),
|
159
|
-
]
|
160
|
-
|
161
|
-
# 严格按照原始循环逻辑注册字段
|
162
|
-
for field_name, desc in asset_fields:
|
163
|
-
schema.add_field(field_name, "decimal", comment=desc, aggregatable=True)
|
122
|
+
# 资产配置比例字段
|
123
|
+
schema.add_field("cash_ratio", "float", comment="现金类资产占比", aggregatable=True)
|
124
|
+
schema.add_field("fixed_income_ratio", "float", comment="固收类资产占比", aggregatable=True)
|
125
|
+
schema.add_field("equity_ratio", "float", comment="权益类资产占比", aggregatable=True)
|
126
|
+
schema.add_field("alternative_ratio", "float", comment="另类资产占比", aggregatable=True)
|
164
127
|
|
165
|
-
|
128
|
+
# 风险偏好相关
|
129
|
+
schema.add_field("risk_level", "int", comment="风险偏好等级(1-5)", aggregatable=True)
|
130
|
+
schema.add_field("investment_experience", "int", comment="投资经验年限", aggregatable=True)
|
131
|
+
|
132
|
+
# 配置变化指标
|
133
|
+
schema.add_field("config_change_freq", "int", comment="配置调整频率", aggregatable=True)
|
134
|
+
schema.add_field("rebalance_count", "int", comment="再平衡次数", aggregatable=True)
|
135
|
+
|
136
|
+
schema.set_monthly_unique(True)
|
166
137
|
return schema
|
167
138
|
|
168
139
|
|
169
|
-
class
|
170
|
-
"""
|
140
|
+
class XinjiangICBCDaifaLongtailMonthlyStatSchema:
|
141
|
+
"""新疆工行代发长尾客户月度统计表"""
|
171
142
|
|
172
143
|
@staticmethod
|
173
144
|
def create() -> TableSchema:
|
174
|
-
"""
|
175
|
-
schema = TableSchema('
|
176
|
-
schema.add_primary_key('party_id', 'string')
|
145
|
+
"""创建新疆工行代发长尾客户月度统计表结构"""
|
146
|
+
schema = TableSchema('xinjiang_icbc_daifa_hlwj_monthly_stat_wy')
|
147
|
+
schema.add_primary_key('party_id', 'string')
|
177
148
|
schema.add_date_field('data_dt', 'string')
|
178
149
|
|
179
|
-
#
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
# 其他特定字段 - 严格按照原始定义
|
195
|
-
schema.add_field("AGENT_SALARY_IN", "decimal", comment="代发工资存入金额", aggregatable=True)
|
196
|
-
schema.add_field("CREDIT_CARD_OUT", "decimal", comment="信用卡取出金额", aggregatable=True)
|
197
|
-
schema.add_field("DEBIT_CARD_OUT", "decimal", comment="借记卡取出金额", aggregatable=True)
|
198
|
-
schema.add_field("BATCH_DEDUCT_OUT", "decimal", comment="批量扣款金额", aggregatable=True)
|
199
|
-
|
200
|
-
# 定义字段结构:交易渠道、指标、时间范围、描述前缀 - 严格按照原始定义
|
201
|
-
fields = [
|
202
|
-
("DEBIT_CARD", "借记卡", "MON3"),
|
203
|
-
("CREDIT_CARD", "信用卡", "MON3"),
|
204
|
-
("THIRD_PAYMENT", "第三方支付", "MON3"),
|
205
|
-
("MOBBANK", "手机银行", "MON12"),
|
206
|
-
("TELBANK", "电话银行", "MON12"),
|
207
|
-
]
|
208
|
-
|
209
|
-
# 定义交易指标 - 严格按照原始定义
|
210
|
-
metrics = [
|
211
|
-
("TX_CNT", "交易次数"),
|
212
|
-
("TX_AMT", "交易金额"),
|
213
|
-
]
|
214
|
-
|
215
|
-
# 自动注册 - 严格按照原始逻辑
|
216
|
-
for channel, desc, period in fields:
|
217
|
-
for metric_code, metric_desc in metrics:
|
218
|
-
field_name = f"{channel}_{metric_code}_{period}"
|
219
|
-
description = f"{desc}{metric_desc}(近{period[-2:]}个月)"
|
220
|
-
schema.add_field(field_name, "decimal", comment=description, aggregatable=True)
|
221
|
-
|
222
|
-
# 其他固定字段 - 严格按照原始定义
|
223
|
-
schema.add_field(
|
224
|
-
"COUNTER_TX_CNT_MON12", "int", comment="柜台交易次数(近12个月)", aggregatable=True
|
225
|
-
)
|
226
|
-
schema.add_field(
|
227
|
-
"WEBBANK_TX_CNT_MON12", "int", comment="网银交易次数(近12个月)", aggregatable=True
|
228
|
-
)
|
229
|
-
|
230
|
-
# 编号国家(1~5) - 严格按照原始循环逻辑
|
231
|
-
for i in range(1, 6):
|
232
|
-
schema.add_field(
|
233
|
-
f"Y1_OVERS_CTY{i}_CNT", "int", comment=f"近一年境外国家{i}的交易次数", aggregatable=True
|
234
|
-
)
|
235
|
-
schema.add_field(
|
236
|
-
f"Y1_OVERS_CNT_CTY{i}_CD",
|
237
|
-
"string",
|
238
|
-
comment=f"近一年境外国家{i}的交易次数(编码)",
|
239
|
-
)
|
240
|
-
schema.add_field(
|
241
|
-
f"Y1_OVERS_CTY{i}_AMT", "decimal", comment=f"近一年境外国家{i}的交易金额", aggregatable=True
|
242
|
-
)
|
243
|
-
schema.add_field(
|
244
|
-
f"Y1_OVERS_AMT_CTY{i}_CD",
|
245
|
-
"string",
|
246
|
-
comment=f"近一年境外国家{i}的交易金额(编码)",
|
247
|
-
)
|
248
|
-
|
249
|
-
# 其他国家 - 严格按照原始定义
|
250
|
-
schema.add_field(
|
251
|
-
"Y1_OVERS_OTHER_CTY_CNT", "int", comment="近一年其他境外国家的交易次数", aggregatable=True
|
252
|
-
)
|
253
|
-
schema.add_field(
|
254
|
-
"Y1_OVERS_OTHER_CTY_AMT", "decimal", comment="近一年其他境外国家的交易金额", aggregatable=True
|
255
|
-
)
|
150
|
+
# 月度交易统计
|
151
|
+
schema.add_field("monthly_txn_count", "int", comment="月度交易笔数", aggregatable=True)
|
152
|
+
schema.add_field("monthly_txn_amount", "decimal", comment="月度交易金额", aggregatable=True)
|
153
|
+
schema.add_field("monthly_deposit_amount", "decimal", comment="月度存入金额", aggregatable=True)
|
154
|
+
schema.add_field("monthly_withdraw_amount", "decimal", comment="月度取出金额", aggregatable=True)
|
155
|
+
|
156
|
+
# 代发工资相关统计
|
157
|
+
schema.add_field("salary_amount", "decimal", comment="月度代发工资金额", aggregatable=True)
|
158
|
+
schema.add_field("salary_date", "string", comment="代发工资日期")
|
159
|
+
schema.add_field("salary_stability", "float", comment="工资稳定性指数", aggregatable=True)
|
160
|
+
|
161
|
+
# 长尾客户特征
|
162
|
+
schema.add_field("longtail_score", "float", comment="长尾客户评分", aggregatable=True)
|
163
|
+
schema.add_field("upgrade_potential", "float", comment="提升潜力评分", aggregatable=True)
|
164
|
+
schema.add_field("churn_risk", "float", comment="流失风险评分", aggregatable=True)
|
256
165
|
|
257
|
-
|
166
|
+
# 活跃度指标
|
167
|
+
schema.add_field("login_days", "int", comment="月度登录天数", aggregatable=True)
|
168
|
+
schema.add_field("channel_usage", "string", comment="渠道使用情况")
|
169
|
+
|
170
|
+
schema.set_monthly_unique(True)
|
258
171
|
return schema
|
259
172
|
|
260
173
|
|
261
|
-
def
|
262
|
-
"""
|
174
|
+
def get_xinjiang_icbc_daifa_longtail_schemas() -> Dict[str, TableSchema]:
|
175
|
+
"""获取新疆工行代发长尾客户所有表结构"""
|
263
176
|
return {
|
264
|
-
'
|
265
|
-
'
|
266
|
-
'
|
267
|
-
'
|
177
|
+
'daifa_longtail_behavior': XinjiangICBCDaifaLongtailBehaviorSchema.create(),
|
178
|
+
'daifa_longtail_asset_avg': XinjiangICBCDaifaLongtailAssetAvgSchema.create(),
|
179
|
+
'daifa_longtail_asset_config': XinjiangICBCDaifaLongtailAssetConfigSchema.create(),
|
180
|
+
'daifa_longtail_monthly_stat': XinjiangICBCDaifaLongtailMonthlyStatSchema.create(),
|
268
181
|
}
|
269
182
|
|
270
183
|
|
271
|
-
def
|
272
|
-
"""
|
273
|
-
导出AUM表结构文档
|
274
|
-
|
275
|
-
Args:
|
276
|
-
output_dir: 输出目录
|
277
|
-
format_type: 文档格式 ('markdown' 或 'pdf')
|
278
|
-
|
279
|
-
Returns:
|
280
|
-
生成的文档文件路径字典
|
281
|
-
"""
|
282
|
-
schemas = get_aum_schemas()
|
184
|
+
def export_xinjiang_icbc_daifa_longtail_docs(output_dir: str = "./docs") -> Dict[str, str]:
|
185
|
+
"""导出新疆工行代发长尾客户表结构文档"""
|
283
186
|
generator = SchemaDocumentGenerator()
|
187
|
+
schemas = get_xinjiang_icbc_daifa_longtail_schemas()
|
188
|
+
exported_files = {}
|
284
189
|
|
285
|
-
results = {}
|
286
190
|
for table_type, schema in schemas.items():
|
287
191
|
file_path = generator.export_schema_doc(
|
288
|
-
schema
|
289
|
-
business_domain="
|
192
|
+
schema,
|
193
|
+
business_domain="新疆工行代发长尾客户",
|
290
194
|
table_type=table_type,
|
291
|
-
output_dir=output_dir
|
292
|
-
format_type=format_type
|
195
|
+
output_dir=output_dir
|
293
196
|
)
|
294
|
-
|
197
|
+
exported_files[table_type] = file_path
|
295
198
|
|
296
|
-
return
|
199
|
+
return exported_files
|
297
200
|
|
298
201
|
|
202
|
+
# 导出主要组件
|
299
203
|
__all__ = [
|
300
|
-
'
|
301
|
-
'
|
302
|
-
'
|
303
|
-
'
|
304
|
-
'
|
305
|
-
'
|
204
|
+
'XinjiangICBCDaifaLongtailBehaviorSchema',
|
205
|
+
'XinjiangICBCDaifaLongtailAssetAvgSchema',
|
206
|
+
'XinjiangICBCDaifaLongtailAssetConfigSchema',
|
207
|
+
'XinjiangICBCDaifaLongtailMonthlyStatSchema',
|
208
|
+
'get_xinjiang_icbc_daifa_longtail_schemas',
|
209
|
+
'export_xinjiang_icbc_daifa_longtail_docs'
|
306
210
|
]
|