staran 0.4.2__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,26 +1,29 @@
1
1
  """
2
- AUM业务表结构定义模块
2
+ 新疆工行代发长尾客户表结构定义模块
3
3
 
4
- 包含AUM (资产管理)业务相关的所有标准表结构:
5
- - 客户行为特征表 (AUMBehaviorSchema)
6
- - 资产平均值表 (AUMAssetAvgSchema)
7
- - 资产配置表 (AUMAssetConfigSchema)
8
- - 月度统计表 (AUMMonthlyStatSchema)
4
+ 包含新疆工行代发长尾客户相关的所有表结构:
5
+ - 代发长尾客户行为特征表 (XinjiangICBCDaifaLongtailBehaviorSchema)
6
+ - 代发长尾客户资产平均值表 (XinjiangICBCDaifaLongtailAssetAvgSchema)
7
+ - 代发长尾客户资产配置表 (XinjiangICBCDaifaLongtailAssetConfigSchema)
8
+ - 代发长尾客户月度统计表 (XinjiangICBCDaifaLongtailMonthlyStatSchema)
9
+
10
+ 数据库: xinjiang_icbc_daifa_longtail
11
+ 业务范围: 代发长尾客户(资产10k-100k)
9
12
 
10
13
  这些表结构可以用于:
11
- 1. 特征工程流水线
12
- 2. 数据模型构建
14
+ 1. 代发长尾客户特征工程
15
+ 2. 提升模型和防流失模型构建
13
16
  3. 业务文档生成
14
17
  4. 数据质量检查
15
18
  """
16
19
 
17
20
  from typing import Dict
18
21
  from ...features.schema import TableSchema
19
- from ..document_generator import SchemaDocumentGenerator
22
+ from ...tools.document_generator import SchemaDocumentGenerator
20
23
 
21
24
 
22
- class AUMBehaviorSchema:
23
- """AUM客户行为特征表 - A表结构定义(严格按照已提供给行方的字段)"""
25
+ class XinjiangICBCDaifaLongtailBehaviorSchema:
26
+ """新疆工行代发长尾客户行为特征表 - 严格按照已提供给行方的字段"""
24
27
 
25
28
  # 统计指标定义 - 与原始定义完全一致
26
29
  _STATS = [
@@ -36,8 +39,8 @@ class AUMBehaviorSchema:
36
39
 
37
40
  @staticmethod
38
41
  def create() -> TableSchema:
39
- """创建客户行为特征表结构 - 严格按照已提供给行方的字段定义"""
40
- schema = TableSchema('bi_hlwj_dfcw_f1_f4_wy')
42
+ """创建新疆工行代发长尾客户行为特征表结构"""
43
+ schema = TableSchema('xinjiang_icbc_daifa_hlwj_dfcw_f1_f4_wy')
41
44
  schema.add_primary_key('party_id', 'string')
42
45
  schema.add_date_field('data_dt', 'string')
43
46
 
@@ -52,7 +55,7 @@ class AUMBehaviorSchema:
52
55
  schema.add_field("orderidcount", "int", comment="订单数", aggregatable=True)
53
56
  schema.add_field("label", "float", comment="标签值(如是否购买)", aggregatable=True)
54
57
 
55
- # productamount and m1 ~ m4 的含义描述 - 严格按照原始定义
58
+ # productamount and m1 ~ m4 的含义描述
56
59
  m_fields = {
57
60
  "productamount": "购买金额",
58
61
  "m1": "去重订单数",
@@ -61,246 +64,147 @@ class AUMBehaviorSchema:
61
64
  "m4": "去重产品品类数",
62
65
  }
63
66
 
64
- # 使用循环注册 productamount and m1~m4 各统计字段 - 严格按照原始逻辑
67
+ # 使用循环注册 productamount and m1~m4 各统计字段
65
68
  for prefix, meaning in m_fields.items():
66
- for stat_key, stat_desc in AUMBehaviorSchema._STATS:
69
+ for stat_key, stat_desc in XinjiangICBCDaifaLongtailBehaviorSchema._STATS:
67
70
  field_name = f"{prefix}_{stat_key}"
68
71
  description = f"{meaning}的{stat_desc}"
69
72
  schema.add_field(field_name, "float", comment=description, aggregatable=True)
70
73
 
71
- # 其他字段 - 严格按照原始定义
74
+ # 其他字段
72
75
  schema.add_field("life_day", "float", comment="客户生命周期天数", aggregatable=True)
73
76
  schema.add_field("gender", "float", comment="性别(编码)", aggregatable=True)
74
77
  schema.add_field("open_day", "float", comment="开户天数", aggregatable=True)
75
78
 
76
- schema.set_monthly_unique(False) # A表每人每日记录
77
- return schema
78
- schema.add_field("FREQ_BRANCH_CODE", "string", comment="常用网点代码")
79
- schema.add_field("CROSS_REGION_TXN", "string", comment="跨地区交易情况")
80
-
81
- schema.set_monthly_unique(False) # A表每人每天一条记录
79
+ schema.set_monthly_unique(False) # 每人每日记录
82
80
  return schema
83
81
 
84
82
 
85
- class AUMAssetAvgSchema:
86
- """AUM资产平均值表 - B表结构定义(严格按照已提供给行方的字段)"""
83
+ class XinjiangICBCDaifaLongtailAssetAvgSchema:
84
+ """新疆工行代发长尾客户资产平均值表"""
87
85
 
88
86
  @staticmethod
89
87
  def create() -> TableSchema:
90
- """创建资产平均值表结构 - 严格按照原始定义"""
91
- schema = TableSchema('bi_hlwj_zi_chan_avg_wy')
88
+ """创建新疆工行代发长尾客户资产平均值表结构"""
89
+ schema = TableSchema('xinjiang_icbc_daifa_hlwj_zi_chan_avg_wy')
92
90
  schema.add_primary_key('party_id', 'string')
93
91
  schema.add_date_field('data_dt', 'string')
94
92
 
95
- # 基础余额字段 - 严格按照原始定义
93
+ # 基础余额字段
96
94
  schema.add_field("asset_total_bal", "decimal", comment="总资产余额", aggregatable=True)
97
95
  schema.add_field("liab_total_bal", "decimal", comment="总负债余额", aggregatable=True)
98
- schema.add_field("dpsit_total_bal", "decimal", comment="存款总余额", aggregatable=True)
99
- schema.add_field("loan_total_bal", "decimal", comment="贷款总余额", aggregatable=True)
100
- schema.add_field("card_total_bal", "decimal", comment="信用卡总余额", aggregatable=True)
101
- schema.add_field("mid_busi_total_bal", "decimal", comment="中间业务总余额", aggregatable=True)
102
-
103
- # Register average balance fields - 严格按照原始逻辑
104
- for period in ["month", "year", "3", "6", "12"]:
105
- schema.add_field(
106
- f"avg_asset_bal_{period}", "decimal", comment=f"平均资产余额 ({period}期)", aggregatable=True
107
- )
108
- for period in ["3", "12"]:
109
- schema.add_field(
110
- f"avg_dpsit_bal_{period}", "decimal", comment=f"平均存款余额 ({period}期)", aggregatable=True
111
- )
96
+ schema.add_field("net_asset_bal", "decimal", comment="净资产余额", aggregatable=True)
112
97
 
113
- schema.set_monthly_unique(True) # B表每人每月唯一
98
+ # 存款相关字段
99
+ schema.add_field("dep_bal", "decimal", comment="存款余额", aggregatable=True)
100
+ schema.add_field("current_dep_bal", "decimal", comment="活期存款余额", aggregatable=True)
101
+ schema.add_field("time_dep_bal", "decimal", comment="定期存款余额", aggregatable=True)
102
+
103
+ # 理财投资字段
104
+ schema.add_field("wealth_bal", "decimal", comment="理财余额", aggregatable=True)
105
+ schema.add_field("fund_bal", "decimal", comment="基金余额", aggregatable=True)
106
+ schema.add_field("insurance_bal", "decimal", comment="保险余额", aggregatable=True)
107
+
108
+ schema.set_monthly_unique(True) # 每人每月一条记录
114
109
  return schema
115
110
 
116
111
 
117
- class AUMAssetConfigSchema:
118
- """AUM资产配置表 - C表结构定义(严格按照已提供给行方的字段)"""
112
+ class XinjiangICBCDaifaLongtailAssetConfigSchema:
113
+ """新疆工行代发长尾客户资产配置表"""
119
114
 
120
115
  @staticmethod
121
116
  def create() -> TableSchema:
122
- """创建资产配置表结构 - 严格按照原始定义"""
123
- schema = TableSchema('bi_hlwj_zi_chang_month_total_zb')
117
+ """创建新疆工行代发长尾客户资产配置表结构"""
118
+ schema = TableSchema('xinjiang_icbc_daifa_hlwj_zi_chan_config_wy')
124
119
  schema.add_primary_key('party_id', 'string')
125
120
  schema.add_date_field('data_dt', 'string')
126
121
 
127
- # asset_fields - 严格按照原始定义
128
- asset_fields = [
129
- ("seg_asset_total", "总资产余额"),
130
- ("indv_consm_loan_amt", "个人消费贷款余额"),
131
- ("indv_house_loan_amt", "个人住房贷款余额"),
132
- ("indv_oper_loan_amt", "个人经营贷款余额"),
133
- ("dpsit_bal", "存款余额"),
134
- ("tbond_bal", "国债余额"),
135
- ("fund_bal", "基金余额"),
136
- ("bond_bal", "债券余额"),
137
- ("gold_bal", "黄金余额"),
138
- ("wcurr_chrem_bal", "外币现钞余额"),
139
- ("presv_mgmt_secu_bal", "保值管理证券余额"),
140
- ("insure_form_bal", "保险单余额"),
141
- ("crdt_card_od_bal", "信用卡透支余额"),
142
- ("crdt_card_con_amt", "信用卡消费金额"),
143
- ("semi_crdt_card_od_bal", "准贷记卡透支余额"),
144
- ("semi_crdt_card_con_amt", "准贷记卡消费金额"),
145
- ("inter_card_con_amt", "国际卡消费金额"),
146
- ("inter_card_od_bal", "国际卡透支余额"),
147
- ("crdt_card_dpsit_bal", "信用卡存款余额"),
148
- ("semi_crdt_card_dpsit_bal", "准贷记卡存款余额"),
149
- ("inter_card_dpsit_bal", "国际卡存款余额"),
150
- ("silver_bal", "白银余额"),
151
- ("agent_solid_silver_bal", "代发实物白银余额"),
152
- ("pt_bal", "个人养老金余额"),
153
- ("pd_bal", "个人养老金存款余额"),
154
- ("other_metal_bal", "其他金属余额"),
155
- ("curr_dpsit_bal", "活期存款余额"),
156
- ("time_dpsit_bal", "定期存款余额"),
157
- ("oil_bal", "石油余额"),
158
- ("fx_bal", "外汇余额"),
159
- ]
160
-
161
- # 严格按照原始循环逻辑注册字段
162
- for field_name, desc in asset_fields:
163
- schema.add_field(field_name, "decimal", comment=desc, aggregatable=True)
122
+ # 资产配置比例字段
123
+ schema.add_field("cash_ratio", "float", comment="现金类资产占比", aggregatable=True)
124
+ schema.add_field("fixed_income_ratio", "float", comment="固收类资产占比", aggregatable=True)
125
+ schema.add_field("equity_ratio", "float", comment="权益类资产占比", aggregatable=True)
126
+ schema.add_field("alternative_ratio", "float", comment="另类资产占比", aggregatable=True)
164
127
 
165
- schema.set_monthly_unique(True) # C表每人每月唯一
128
+ # 风险偏好相关
129
+ schema.add_field("risk_level", "int", comment="风险偏好等级(1-5)", aggregatable=True)
130
+ schema.add_field("investment_experience", "int", comment="投资经验年限", aggregatable=True)
131
+
132
+ # 配置变化指标
133
+ schema.add_field("config_change_freq", "int", comment="配置调整频率", aggregatable=True)
134
+ schema.add_field("rebalance_count", "int", comment="再平衡次数", aggregatable=True)
135
+
136
+ schema.set_monthly_unique(True)
166
137
  return schema
167
138
 
168
139
 
169
- class AUMMonthlyStatSchema:
170
- """AUM月度统计表 - D表结构定义(严格按照已提供给行方的字段)"""
140
+ class XinjiangICBCDaifaLongtailMonthlyStatSchema:
141
+ """新疆工行代发长尾客户月度统计表"""
171
142
 
172
143
  @staticmethod
173
144
  def create() -> TableSchema:
174
- """创建月度统计表结构 - 严格按照原始定义"""
175
- schema = TableSchema('bi_hlwj_realy_month_stat_wy')
176
- schema.add_primary_key('party_id', 'string') # 修正主键名称
145
+ """创建新疆工行代发长尾客户月度统计表结构"""
146
+ schema = TableSchema('xinjiang_icbc_daifa_hlwj_monthly_stat_wy')
147
+ schema.add_primary_key('party_id', 'string')
177
148
  schema.add_date_field('data_dt', 'string')
178
149
 
179
- # channels字典 - 严格按照原始定义
180
- channels = {
181
- "CASH_DEPIST": "现金",
182
- "REMIT": "汇款",
183
- "YY": "邮政储蓄",
184
- "UNIONPAY": "银联",
185
- "FIN_ASSET": "理财产品",
186
- "CORP_ACCT": "对公账户",
187
- }
188
-
189
- # 注册存入和取出字段 - 严格按照原始逻辑
190
- for prefix, desc in channels.items():
191
- schema.add_field(f"{prefix}_IN", "decimal", comment=f"{desc}存入金额", aggregatable=True)
192
- schema.add_field(f"{prefix}_OUT", "decimal", comment=f"{desc}取出金额", aggregatable=True)
193
-
194
- # 其他特定字段 - 严格按照原始定义
195
- schema.add_field("AGENT_SALARY_IN", "decimal", comment="代发工资存入金额", aggregatable=True)
196
- schema.add_field("CREDIT_CARD_OUT", "decimal", comment="信用卡取出金额", aggregatable=True)
197
- schema.add_field("DEBIT_CARD_OUT", "decimal", comment="借记卡取出金额", aggregatable=True)
198
- schema.add_field("BATCH_DEDUCT_OUT", "decimal", comment="批量扣款金额", aggregatable=True)
199
-
200
- # 定义字段结构:交易渠道、指标、时间范围、描述前缀 - 严格按照原始定义
201
- fields = [
202
- ("DEBIT_CARD", "借记卡", "MON3"),
203
- ("CREDIT_CARD", "信用卡", "MON3"),
204
- ("THIRD_PAYMENT", "第三方支付", "MON3"),
205
- ("MOBBANK", "手机银行", "MON12"),
206
- ("TELBANK", "电话银行", "MON12"),
207
- ]
208
-
209
- # 定义交易指标 - 严格按照原始定义
210
- metrics = [
211
- ("TX_CNT", "交易次数"),
212
- ("TX_AMT", "交易金额"),
213
- ]
214
-
215
- # 自动注册 - 严格按照原始逻辑
216
- for channel, desc, period in fields:
217
- for metric_code, metric_desc in metrics:
218
- field_name = f"{channel}_{metric_code}_{period}"
219
- description = f"{desc}{metric_desc}(近{period[-2:]}个月)"
220
- schema.add_field(field_name, "decimal", comment=description, aggregatable=True)
221
-
222
- # 其他固定字段 - 严格按照原始定义
223
- schema.add_field(
224
- "COUNTER_TX_CNT_MON12", "int", comment="柜台交易次数(近12个月)", aggregatable=True
225
- )
226
- schema.add_field(
227
- "WEBBANK_TX_CNT_MON12", "int", comment="网银交易次数(近12个月)", aggregatable=True
228
- )
229
-
230
- # 编号国家(1~5) - 严格按照原始循环逻辑
231
- for i in range(1, 6):
232
- schema.add_field(
233
- f"Y1_OVERS_CTY{i}_CNT", "int", comment=f"近一年境外国家{i}的交易次数", aggregatable=True
234
- )
235
- schema.add_field(
236
- f"Y1_OVERS_CNT_CTY{i}_CD",
237
- "string",
238
- comment=f"近一年境外国家{i}的交易次数(编码)",
239
- )
240
- schema.add_field(
241
- f"Y1_OVERS_CTY{i}_AMT", "decimal", comment=f"近一年境外国家{i}的交易金额", aggregatable=True
242
- )
243
- schema.add_field(
244
- f"Y1_OVERS_AMT_CTY{i}_CD",
245
- "string",
246
- comment=f"近一年境外国家{i}的交易金额(编码)",
247
- )
248
-
249
- # 其他国家 - 严格按照原始定义
250
- schema.add_field(
251
- "Y1_OVERS_OTHER_CTY_CNT", "int", comment="近一年其他境外国家的交易次数", aggregatable=True
252
- )
253
- schema.add_field(
254
- "Y1_OVERS_OTHER_CTY_AMT", "decimal", comment="近一年其他境外国家的交易金额", aggregatable=True
255
- )
150
+ # 月度交易统计
151
+ schema.add_field("monthly_txn_count", "int", comment="月度交易笔数", aggregatable=True)
152
+ schema.add_field("monthly_txn_amount", "decimal", comment="月度交易金额", aggregatable=True)
153
+ schema.add_field("monthly_deposit_amount", "decimal", comment="月度存入金额", aggregatable=True)
154
+ schema.add_field("monthly_withdraw_amount", "decimal", comment="月度取出金额", aggregatable=True)
155
+
156
+ # 代发工资相关统计
157
+ schema.add_field("salary_amount", "decimal", comment="月度代发工资金额", aggregatable=True)
158
+ schema.add_field("salary_date", "string", comment="代发工资日期")
159
+ schema.add_field("salary_stability", "float", comment="工资稳定性指数", aggregatable=True)
160
+
161
+ # 长尾客户特征
162
+ schema.add_field("longtail_score", "float", comment="长尾客户评分", aggregatable=True)
163
+ schema.add_field("upgrade_potential", "float", comment="提升潜力评分", aggregatable=True)
164
+ schema.add_field("churn_risk", "float", comment="流失风险评分", aggregatable=True)
256
165
 
257
- schema.set_monthly_unique(True) # D表每人每月唯一
166
+ # 活跃度指标
167
+ schema.add_field("login_days", "int", comment="月度登录天数", aggregatable=True)
168
+ schema.add_field("channel_usage", "string", comment="渠道使用情况")
169
+
170
+ schema.set_monthly_unique(True)
258
171
  return schema
259
172
 
260
173
 
261
- def get_aum_schemas() -> Dict[str, TableSchema]:
262
- """获取所有AUM业务表结构"""
174
+ def get_xinjiang_icbc_daifa_longtail_schemas() -> Dict[str, TableSchema]:
175
+ """获取新疆工行代发长尾客户所有表结构"""
263
176
  return {
264
- 'behavior': AUMBehaviorSchema.create(),
265
- 'asset_avg': AUMAssetAvgSchema.create(),
266
- 'asset_config': AUMAssetConfigSchema.create(),
267
- 'monthly_stat': AUMMonthlyStatSchema.create()
177
+ 'daifa_longtail_behavior': XinjiangICBCDaifaLongtailBehaviorSchema.create(),
178
+ 'daifa_longtail_asset_avg': XinjiangICBCDaifaLongtailAssetAvgSchema.create(),
179
+ 'daifa_longtail_asset_config': XinjiangICBCDaifaLongtailAssetConfigSchema.create(),
180
+ 'daifa_longtail_monthly_stat': XinjiangICBCDaifaLongtailMonthlyStatSchema.create(),
268
181
  }
269
182
 
270
183
 
271
- def export_aum_docs(output_dir: str = "./docs", format_type: str = "markdown") -> Dict[str, str]:
272
- """
273
- 导出AUM表结构文档
274
-
275
- Args:
276
- output_dir: 输出目录
277
- format_type: 文档格式 ('markdown' 或 'pdf')
278
-
279
- Returns:
280
- 生成的文档文件路径字典
281
- """
282
- schemas = get_aum_schemas()
184
+ def export_xinjiang_icbc_daifa_longtail_docs(output_dir: str = "./docs") -> Dict[str, str]:
185
+ """导出新疆工行代发长尾客户表结构文档"""
283
186
  generator = SchemaDocumentGenerator()
187
+ schemas = get_xinjiang_icbc_daifa_longtail_schemas()
188
+ exported_files = {}
284
189
 
285
- results = {}
286
190
  for table_type, schema in schemas.items():
287
191
  file_path = generator.export_schema_doc(
288
- schema=schema,
289
- business_domain="AUM",
192
+ schema,
193
+ business_domain="新疆工行代发长尾客户",
290
194
  table_type=table_type,
291
- output_dir=output_dir,
292
- format_type=format_type
195
+ output_dir=output_dir
293
196
  )
294
- results[table_type] = file_path
197
+ exported_files[table_type] = file_path
295
198
 
296
- return results
199
+ return exported_files
297
200
 
298
201
 
202
+ # 导出主要组件
299
203
  __all__ = [
300
- 'AUMBehaviorSchema',
301
- 'AUMAssetAvgSchema',
302
- 'AUMAssetConfigSchema',
303
- 'AUMMonthlyStatSchema',
304
- 'get_aum_schemas',
305
- 'export_aum_docs'
204
+ 'XinjiangICBCDaifaLongtailBehaviorSchema',
205
+ 'XinjiangICBCDaifaLongtailAssetAvgSchema',
206
+ 'XinjiangICBCDaifaLongtailAssetConfigSchema',
207
+ 'XinjiangICBCDaifaLongtailMonthlyStatSchema',
208
+ 'get_xinjiang_icbc_daifa_longtail_schemas',
209
+ 'export_xinjiang_icbc_daifa_longtail_docs'
306
210
  ]