staran 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -20,84 +20,61 @@ from ..document_generator import SchemaDocumentGenerator
20
20
 
21
21
 
22
22
  class AUMBehaviorSchema:
23
- """AUM客户行为特征表 - A表结构定义"""
23
+ """AUM客户行为特征表 - A表结构定义(严格按照已提供给行方的字段)"""
24
+
25
+ # 统计指标定义 - 与原始定义完全一致
26
+ _STATS = [
27
+ ("max", "最大值"),
28
+ ("min", "最小值"),
29
+ ("sum", "总和"),
30
+ ("avg", "均值"),
31
+ ("var", "方差"),
32
+ ("std", "标准差"),
33
+ ("rng", "极差"),
34
+ ("med", "中位数"),
35
+ ]
24
36
 
25
37
  @staticmethod
26
38
  def create() -> TableSchema:
27
- """创建客户行为特征表结构"""
39
+ """创建客户行为特征表结构 - 严格按照已提供给行方的字段定义"""
28
40
  schema = TableSchema('bi_hlwj_dfcw_f1_f4_wy')
29
41
  schema.add_primary_key('party_id', 'string')
30
42
  schema.add_date_field('data_dt', 'string')
31
43
 
32
- # 基础信息字段
33
- schema.add_field("AGE", "int", comment="客户年龄", aggregatable=True)
34
- schema.add_field("GENDER", "string", comment="客户性别")
35
- schema.add_field("EDU_LEVEL", "string", comment="教育水平")
36
- schema.add_field("MARITAL_STATUS", "string", comment="婚姻状况")
37
- schema.add_field("INCOME_LEVEL", "string", comment="收入水平")
38
- schema.add_field("OCCUPATION", "string", comment="职业类型")
39
- schema.add_field("CITY_LEVEL", "string", comment="城市等级")
40
-
41
- # 账户信息字段
42
- schema.add_field("ACCT_OPEN_MONTHS", "int", comment="开户月数", aggregatable=True)
43
- schema.add_field("MAIN_ACCT_BAL", "decimal", comment="主账户余额", aggregatable=True)
44
- schema.add_field("ACCT_COUNT", "int", comment="账户总数", aggregatable=True)
45
- schema.add_field("DEPOSIT_ACCT_COUNT", "int", comment="存款账户数", aggregatable=True)
46
- schema.add_field("LOAN_ACCT_COUNT", "int", comment="贷款账户数", aggregatable=True)
47
- schema.add_field("CREDIT_CARD_COUNT", "int", comment="信用卡数量", aggregatable=True)
48
-
49
- # 交易行为字段
50
- schema.add_field("MON3_TXN_COUNT", "int", comment="近3月交易次数", aggregatable=True)
51
- schema.add_field("MON3_TXN_AMT", "decimal", comment="近3月交易金额", aggregatable=True)
52
- schema.add_field("MON6_TXN_COUNT", "int", comment="近6月交易次数", aggregatable=True)
53
- schema.add_field("MON6_TXN_AMT", "decimal", comment="近6月交易金额", aggregatable=True)
54
- schema.add_field("YEAR1_TXN_COUNT", "int", comment="近1年交易次数", aggregatable=True)
55
- schema.add_field("YEAR1_TXN_AMT", "decimal", comment="近1年交易金额", aggregatable=True)
56
-
57
- # 渠道使用偏好
58
- schema.add_field("ONLINE_BANK_USAGE", "string", comment="网银使用频度")
59
- schema.add_field("MOBILE_BANK_USAGE", "string", comment="手机银行使用频度")
60
- schema.add_field("ATM_USAGE", "string", comment="ATM使用频度")
61
- schema.add_field("COUNTER_USAGE", "string", comment="柜台使用频度")
62
-
63
- # 产品持有情况
64
- schema.add_field("WEALTH_PROD_COUNT", "int", comment="理财产品数量", aggregatable=True)
65
- schema.add_field("FUND_PROD_COUNT", "int", comment="基金产品数量", aggregatable=True)
66
- schema.add_field("INSURANCE_PROD_COUNT", "int", comment="保险产品数量", aggregatable=True)
67
- schema.add_field("GOLD_PROD_COUNT", "int", comment="黄金产品数量", aggregatable=True)
68
-
69
- # 风险评级相关
70
- schema.add_field("RISK_LEVEL", "string", comment="风险等级")
71
- schema.add_field("RISK_APPETITE", "string", comment="风险偏好")
72
- schema.add_field("INVESTMENT_EXPERIENCE", "string", comment="投资经验")
73
-
74
- # 服务渠道偏好
75
- schema.add_field("PREFER_CHANNEL", "string", comment="偏好服务渠道")
76
- schema.add_field("CONTACT_TIME_PREFER", "string", comment="联系时间偏好")
77
-
78
- # 客户活跃度指标
79
- schema.add_field("LOGIN_DAYS_MON3", "int", comment="近3月登录天数", aggregatable=True)
80
- schema.add_field("LOGIN_DAYS_MON6", "int", comment="近6月登录天数", aggregatable=True)
81
- schema.add_field("LAST_LOGIN_DAYS", "int", comment="最后登录距今天数", aggregatable=True)
82
- schema.add_field("ACTIVE_LEVEL", "string", comment="活跃度等级")
83
-
84
- # 客户价值指标
85
- schema.add_field("CUSTOMER_VALUE_SCORE", "decimal", comment="客户价值评分", aggregatable=True)
86
- schema.add_field("POTENTIAL_VALUE_SCORE", "decimal", comment="潜在价值评分", aggregatable=True)
87
- schema.add_field("RETENTION_SCORE", "decimal", comment="留存倾向评分", aggregatable=True)
88
-
89
- # 营销响应历史
90
- schema.add_field("CAMPAIGN_RESPONSE_RATE", "decimal", comment="营销响应率", aggregatable=True)
91
- schema.add_field("LAST_CAMPAIGN_RESPONSE", "string", comment="最近营销响应")
92
- schema.add_field("PRODUCT_CROSS_SELL_COUNT", "int", comment="交叉销售产品数", aggregatable=True)
93
-
94
- # 投诉与满意度
95
- schema.add_field("COMPLAINT_COUNT_YEAR1", "int", comment="近1年投诉次数", aggregatable=True)
96
- schema.add_field("SATISFACTION_SCORE", "decimal", comment="满意度评分", aggregatable=True)
97
- schema.add_field("NPS_SCORE", "decimal", comment="净推荐值", aggregatable=True)
44
+ # 基础字段 - 严格按照原始定义
45
+ schema.add_field("buy_ct", "int", comment="购买次数", aggregatable=True)
46
+ schema.add_field("recency", "int", comment="最近一次购买距今天数", aggregatable=True)
47
+ schema.add_field("tenure", "int", comment="客户关系持续时间", aggregatable=True)
48
+ schema.add_field("window1", "string", comment="时间窗口标记")
49
+ schema.add_field("freq", "float", comment="总购买频率", aggregatable=True)
50
+ schema.add_field("freq1", "float", comment="最近时间段购买频率", aggregatable=True)
51
+ schema.add_field("productidcount", "int", comment="产品种类数", aggregatable=True)
52
+ schema.add_field("orderidcount", "int", comment="订单数", aggregatable=True)
53
+ schema.add_field("label", "float", comment="标签值(如是否购买)", aggregatable=True)
54
+
55
+ # productamount and m1 ~ m4 的含义描述 - 严格按照原始定义
56
+ m_fields = {
57
+ "productamount": "购买金额",
58
+ "m1": "去重订单数",
59
+ "m2": "去重商品数",
60
+ "m3": "去重渠道数",
61
+ "m4": "去重产品品类数",
62
+ }
63
+
64
+ # 使用循环注册 productamount and m1~m4 各统计字段 - 严格按照原始逻辑
65
+ for prefix, meaning in m_fields.items():
66
+ for stat_key, stat_desc in AUMBehaviorSchema._STATS:
67
+ field_name = f"{prefix}_{stat_key}"
68
+ description = f"{meaning}的{stat_desc}"
69
+ schema.add_field(field_name, "float", comment=description, aggregatable=True)
70
+
71
+ # 其他字段 - 严格按照原始定义
72
+ schema.add_field("life_day", "float", comment="客户生命周期天数", aggregatable=True)
73
+ schema.add_field("gender", "float", comment="性别(编码)", aggregatable=True)
74
+ schema.add_field("open_day", "float", comment="开户天数", aggregatable=True)
98
75
 
99
- # 地理位置相关
100
- schema.add_field("HOME_BRANCH_CODE", "string", comment="归属网点代码")
76
+ schema.set_monthly_unique(False) # A表每人每日记录
77
+ return schema
101
78
  schema.add_field("FREQ_BRANCH_CODE", "string", comment="常用网点代码")
102
79
  schema.add_field("CROSS_REGION_TXN", "string", comment="跨地区交易情况")
103
80
 
@@ -106,161 +83,176 @@ class AUMBehaviorSchema:
106
83
 
107
84
 
108
85
  class AUMAssetAvgSchema:
109
- """AUM资产平均值表 - B表结构定义"""
86
+ """AUM资产平均值表 - B表结构定义(严格按照已提供给行方的字段)"""
110
87
 
111
88
  @staticmethod
112
89
  def create() -> TableSchema:
113
- """创建资产平均值表结构"""
90
+ """创建资产平均值表结构 - 严格按照原始定义"""
114
91
  schema = TableSchema('bi_hlwj_zi_chan_avg_wy')
115
92
  schema.add_primary_key('party_id', 'string')
116
93
  schema.add_date_field('data_dt', 'string')
117
94
 
118
- # 各类资产平均余额
119
- schema.add_field("TOTAL_ASSET_AVG", "decimal", comment="总资产平均值", aggregatable=True)
120
- schema.add_field("DEPOSIT_AVG", "decimal", comment="存款平均余额", aggregatable=True)
121
- schema.add_field("CURRENT_DEPOSIT_AVG", "decimal", comment="活期存款平均余额", aggregatable=True)
122
- schema.add_field("TIME_DEPOSIT_AVG", "decimal", comment="定期存款平均余额", aggregatable=True)
123
- schema.add_field("WEALTH_PRODUCT_AVG", "decimal", comment="理财产品平均余额", aggregatable=True)
124
- schema.add_field("FUND_ASSET_AVG", "decimal", comment="基金资产平均值", aggregatable=True)
125
- schema.add_field("INSURANCE_ASSET_AVG", "decimal", comment="保险资产平均值", aggregatable=True)
126
- schema.add_field("BOND_ASSET_AVG", "decimal", comment="债券资产平均值", aggregatable=True)
127
- schema.add_field("STOCK_ASSET_AVG", "decimal", comment="股票资产平均值", aggregatable=True)
128
- schema.add_field("GOLD_ASSET_AVG", "decimal", comment="黄金资产平均值", aggregatable=True)
129
- schema.add_field("FOREX_ASSET_AVG", "decimal", comment="外汇资产平均值", aggregatable=True)
130
-
131
- # 负债相关平均值
132
- schema.add_field("TOTAL_DEBT_AVG", "decimal", comment="总负债平均值", aggregatable=True)
133
- schema.add_field("MORTGAGE_DEBT_AVG", "decimal", comment="房贷平均余额", aggregatable=True)
134
- schema.add_field("CREDIT_CARD_DEBT_AVG", "decimal", comment="信用卡负债平均值", aggregatable=True)
135
- schema.add_field("OTHER_LOAN_AVG", "decimal", comment="其他贷款平均余额", aggregatable=True)
95
+ # 基础余额字段 - 严格按照原始定义
96
+ schema.add_field("asset_total_bal", "decimal", comment="总资产余额", aggregatable=True)
97
+ schema.add_field("liab_total_bal", "decimal", comment="总负债余额", aggregatable=True)
98
+ schema.add_field("dpsit_total_bal", "decimal", comment="存款总余额", aggregatable=True)
99
+ schema.add_field("loan_total_bal", "decimal", comment="贷款总余额", aggregatable=True)
100
+ schema.add_field("card_total_bal", "decimal", comment="信用卡总余额", aggregatable=True)
101
+ schema.add_field("mid_busi_total_bal", "decimal", comment="中间业务总余额", aggregatable=True)
102
+
103
+ # Register average balance fields - 严格按照原始逻辑
104
+ for period in ["month", "year", "3", "6", "12"]:
105
+ schema.add_field(
106
+ f"avg_asset_bal_{period}", "decimal", comment=f"平均资产余额 ({period}期)", aggregatable=True
107
+ )
108
+ for period in ["3", "12"]:
109
+ schema.add_field(
110
+ f"avg_dpsit_bal_{period}", "decimal", comment=f"平均存款余额 ({period}期)", aggregatable=True
111
+ )
136
112
 
137
113
  schema.set_monthly_unique(True) # B表每人每月唯一
138
114
  return schema
139
115
 
140
116
 
141
117
  class AUMAssetConfigSchema:
142
- """AUM资产配置表 - C表结构定义"""
118
+ """AUM资产配置表 - C表结构定义(严格按照已提供给行方的字段)"""
143
119
 
144
120
  @staticmethod
145
121
  def create() -> TableSchema:
146
- """创建资产配置表结构"""
122
+ """创建资产配置表结构 - 严格按照原始定义"""
147
123
  schema = TableSchema('bi_hlwj_zi_chang_month_total_zb')
148
124
  schema.add_primary_key('party_id', 'string')
149
125
  schema.add_date_field('data_dt', 'string')
150
126
 
151
- # 资产配置比例
152
- schema.add_field("DEPOSIT_RATIO", "decimal", comment="存款资产占比", aggregatable=True)
153
- schema.add_field("WEALTH_RATIO", "decimal", comment="理财产品占比", aggregatable=True)
154
- schema.add_field("FUND_RATIO", "decimal", comment="基金资产占比", aggregatable=True)
155
- schema.add_field("INSURANCE_RATIO", "decimal", comment="保险资产占比", aggregatable=True)
156
- schema.add_field("BOND_RATIO", "decimal", comment="债券资产占比", aggregatable=True)
157
- schema.add_field("STOCK_RATIO", "decimal", comment="股票资产占比", aggregatable=True)
158
- schema.add_field("GOLD_RATIO", "decimal", comment="黄金资产占比", aggregatable=True)
159
- schema.add_field("FOREX_RATIO", "decimal", comment="外汇资产占比", aggregatable=True)
160
-
161
- # 风险资产vs安全资产配置
162
- schema.add_field("HIGH_RISK_RATIO", "decimal", comment="高风险资产占比", aggregatable=True)
163
- schema.add_field("MEDIUM_RISK_RATIO", "decimal", comment="中风险资产占比", aggregatable=True)
164
- schema.add_field("LOW_RISK_RATIO", "decimal", comment="低风险资产占比", aggregatable=True)
165
- schema.add_field("SAFE_ASSET_RATIO", "decimal", comment="安全资产占比", aggregatable=True)
166
-
167
- # 流动性配置
168
- schema.add_field("HIGH_LIQUIDITY_RATIO", "decimal", comment="高流动性资产占比", aggregatable=True)
169
- schema.add_field("MEDIUM_LIQUIDITY_RATIO", "decimal", comment="中流动性资产占比", aggregatable=True)
170
- schema.add_field("LOW_LIQUIDITY_RATIO", "decimal", comment="低流动性资产占比", aggregatable=True)
171
-
172
- # 期限结构配置
173
- schema.add_field("SHORT_TERM_RATIO", "decimal", comment="短期资产占比", aggregatable=True)
174
- schema.add_field("MEDIUM_TERM_RATIO", "decimal", comment="中期资产占比", aggregatable=True)
175
- schema.add_field("LONG_TERM_RATIO", "decimal", comment="长期资产占比", aggregatable=True)
176
-
177
- # 货币配置
178
- schema.add_field("RMB_ASSET_RATIO", "decimal", comment="人民币资产占比", aggregatable=True)
179
- schema.add_field("USD_ASSET_RATIO", "decimal", comment="美元资产占比", aggregatable=True)
180
- schema.add_field("EUR_ASSET_RATIO", "decimal", comment="欧元资产占比", aggregatable=True)
181
- schema.add_field("OTHER_CURRENCY_RATIO", "decimal", comment="其他货币资产占比", aggregatable=True)
182
-
183
- # 配置集中度指标
184
- schema.add_field("ASSET_CONCENTRATION_INDEX", "decimal", comment="资产集中度指数", aggregatable=True)
185
- schema.add_field("DIVERSIFICATION_SCORE", "decimal", comment="分散化程度评分", aggregatable=True)
186
-
187
- # 动态配置指标
188
- schema.add_field("CONFIG_CHANGE_FREQ", "int", comment="配置调整频率", aggregatable=True)
189
- schema.add_field("LAST_REBALANCE_DAYS", "int", comment="最后再平衡距今天数", aggregatable=True)
190
-
191
- # 配置绩效相关
192
- schema.add_field("CONFIG_RETURN_RATE", "decimal", comment="配置收益率", aggregatable=True)
193
- schema.add_field("RISK_ADJUSTED_RETURN", "decimal", comment="风险调整收益", aggregatable=True)
194
- schema.add_field("SHARPE_RATIO", "decimal", comment="夏普比率", aggregatable=True)
195
-
196
- # 配置建议相关
197
- schema.add_field("OPTIMAL_CONFIG_SCORE", "decimal", comment="最优配置评分", aggregatable=True)
198
- schema.add_field("CONFIG_IMPROVEMENT_POTENTIAL", "decimal", comment="配置优化潜力", aggregatable=True)
127
+ # asset_fields - 严格按照原始定义
128
+ asset_fields = [
129
+ ("seg_asset_total", "总资产余额"),
130
+ ("indv_consm_loan_amt", "个人消费贷款余额"),
131
+ ("indv_house_loan_amt", "个人住房贷款余额"),
132
+ ("indv_oper_loan_amt", "个人经营贷款余额"),
133
+ ("dpsit_bal", "存款余额"),
134
+ ("tbond_bal", "国债余额"),
135
+ ("fund_bal", "基金余额"),
136
+ ("bond_bal", "债券余额"),
137
+ ("gold_bal", "黄金余额"),
138
+ ("wcurr_chrem_bal", "外币现钞余额"),
139
+ ("presv_mgmt_secu_bal", "保值管理证券余额"),
140
+ ("insure_form_bal", "保险单余额"),
141
+ ("crdt_card_od_bal", "信用卡透支余额"),
142
+ ("crdt_card_con_amt", "信用卡消费金额"),
143
+ ("semi_crdt_card_od_bal", "准贷记卡透支余额"),
144
+ ("semi_crdt_card_con_amt", "准贷记卡消费金额"),
145
+ ("inter_card_con_amt", "国际卡消费金额"),
146
+ ("inter_card_od_bal", "国际卡透支余额"),
147
+ ("crdt_card_dpsit_bal", "信用卡存款余额"),
148
+ ("semi_crdt_card_dpsit_bal", "准贷记卡存款余额"),
149
+ ("inter_card_dpsit_bal", "国际卡存款余额"),
150
+ ("silver_bal", "白银余额"),
151
+ ("agent_solid_silver_bal", "代发实物白银余额"),
152
+ ("pt_bal", "个人养老金余额"),
153
+ ("pd_bal", "个人养老金存款余额"),
154
+ ("other_metal_bal", "其他金属余额"),
155
+ ("curr_dpsit_bal", "活期存款余额"),
156
+ ("time_dpsit_bal", "定期存款余额"),
157
+ ("oil_bal", "石油余额"),
158
+ ("fx_bal", "外汇余额"),
159
+ ]
160
+
161
+ # 严格按照原始循环逻辑注册字段
162
+ for field_name, desc in asset_fields:
163
+ schema.add_field(field_name, "decimal", comment=desc, aggregatable=True)
199
164
 
200
165
  schema.set_monthly_unique(True) # C表每人每月唯一
201
166
  return schema
202
167
 
203
168
 
204
169
  class AUMMonthlyStatSchema:
205
- """AUM月度统计表 - D表结构定义"""
170
+ """AUM月度统计表 - D表结构定义(严格按照已提供给行方的字段)"""
206
171
 
207
172
  @staticmethod
208
173
  def create() -> TableSchema:
209
- """创建月度统计表结构"""
174
+ """创建月度统计表结构 - 严格按照原始定义"""
210
175
  schema = TableSchema('bi_hlwj_realy_month_stat_wy')
211
- schema.add_primary_key('party_dt', 'string') # 注意这个表的主键是party_dt
176
+ schema.add_primary_key('party_id', 'string') # 修正主键名称
212
177
  schema.add_date_field('data_dt', 'string')
213
178
 
214
- # 渠道存取款字段
179
+ # channels字典 - 严格按照原始定义
215
180
  channels = {
216
181
  "CASH_DEPIST": "现金",
217
- "REMIT": "汇款",
182
+ "REMIT": "汇款",
218
183
  "YY": "邮政储蓄",
219
184
  "UNIONPAY": "银联",
220
185
  "FIN_ASSET": "理财产品",
221
- "CORP_ACCT": "对公账户"
186
+ "CORP_ACCT": "对公账户",
222
187
  }
223
-
188
+
189
+ # 注册存入和取出字段 - 严格按照原始逻辑
224
190
  for prefix, desc in channels.items():
225
191
  schema.add_field(f"{prefix}_IN", "decimal", comment=f"{desc}存入金额", aggregatable=True)
226
192
  schema.add_field(f"{prefix}_OUT", "decimal", comment=f"{desc}取出金额", aggregatable=True)
227
-
228
- # 其他存取款字段
193
+
194
+ # 其他特定字段 - 严格按照原始定义
229
195
  schema.add_field("AGENT_SALARY_IN", "decimal", comment="代发工资存入金额", aggregatable=True)
230
196
  schema.add_field("CREDIT_CARD_OUT", "decimal", comment="信用卡取出金额", aggregatable=True)
231
197
  schema.add_field("DEBIT_CARD_OUT", "decimal", comment="借记卡取出金额", aggregatable=True)
232
198
  schema.add_field("BATCH_DEDUCT_OUT", "decimal", comment="批量扣款金额", aggregatable=True)
233
-
234
- # 交易渠道指标字段
199
+
200
+ # 定义字段结构:交易渠道、指标、时间范围、描述前缀 - 严格按照原始定义
235
201
  fields = [
236
202
  ("DEBIT_CARD", "借记卡", "MON3"),
237
203
  ("CREDIT_CARD", "信用卡", "MON3"),
238
204
  ("THIRD_PAYMENT", "第三方支付", "MON3"),
239
205
  ("MOBBANK", "手机银行", "MON12"),
240
- ("TELBANK", "电话银行", "MON12")
206
+ ("TELBANK", "电话银行", "MON12"),
241
207
  ]
242
-
243
- metrics = [("TX_CNT", "交易次数"), ("TX_AMT", "交易金额")]
244
-
208
+
209
+ # 定义交易指标 - 严格按照原始定义
210
+ metrics = [
211
+ ("TX_CNT", "交易次数"),
212
+ ("TX_AMT", "交易金额"),
213
+ ]
214
+
215
+ # 自动注册 - 严格按照原始逻辑
245
216
  for channel, desc, period in fields:
246
217
  for metric_code, metric_desc in metrics:
247
218
  field_name = f"{channel}_{metric_code}_{period}"
248
219
  description = f"{desc}{metric_desc}(近{period[-2:]}个月)"
249
220
  schema.add_field(field_name, "decimal", comment=description, aggregatable=True)
250
-
251
- # 其他交易字段
252
- schema.add_field("COUNTER_TX_CNT_MON12", "int", comment="柜台交易次数(近12个月)", aggregatable=True)
253
- schema.add_field("WEBBANK_TX_CNT_MON12", "int", comment="网银交易次数(近12个月)", aggregatable=True)
254
-
255
- # 境外交易字段
221
+
222
+ # 其他固定字段 - 严格按照原始定义
223
+ schema.add_field(
224
+ "COUNTER_TX_CNT_MON12", "int", comment="柜台交易次数(近12个月)", aggregatable=True
225
+ )
226
+ schema.add_field(
227
+ "WEBBANK_TX_CNT_MON12", "int", comment="网银交易次数(近12个月)", aggregatable=True
228
+ )
229
+
230
+ # 编号国家(1~5) - 严格按照原始循环逻辑
256
231
  for i in range(1, 6):
257
- schema.add_field(f"Y1_OVERS_CTY{i}_CNT", "int", comment=f"近一年境外国家{i}的交易次数", aggregatable=True)
258
- schema.add_field(f"Y1_OVERS_CNT_CTY{i}_CD", "string", comment=f"近一年境外国家{i}的交易次数(编码)")
259
- schema.add_field(f"Y1_OVERS_CTY{i}_AMT", "decimal", comment=f"近一年境外国家{i}的交易金额", aggregatable=True)
260
- schema.add_field(f"Y1_OVERS_AMT_CTY{i}_CD", "string", comment=f"近一年境外国家{i}的交易金额(编码)")
261
-
262
- schema.add_field("Y1_OVERS_OTHER_CTY_CNT", "int", comment="近一年其他境外国家的交易次数", aggregatable=True)
263
- schema.add_field("Y1_OVERS_OTHER_CTY_AMT", "decimal", comment="近一年其他境外国家的交易金额", aggregatable=True)
232
+ schema.add_field(
233
+ f"Y1_OVERS_CTY{i}_CNT", "int", comment=f"近一年境外国家{i}的交易次数", aggregatable=True
234
+ )
235
+ schema.add_field(
236
+ f"Y1_OVERS_CNT_CTY{i}_CD",
237
+ "string",
238
+ comment=f"近一年境外国家{i}的交易次数(编码)",
239
+ )
240
+ schema.add_field(
241
+ f"Y1_OVERS_CTY{i}_AMT", "decimal", comment=f"近一年境外国家{i}的交易金额", aggregatable=True
242
+ )
243
+ schema.add_field(
244
+ f"Y1_OVERS_AMT_CTY{i}_CD",
245
+ "string",
246
+ comment=f"近一年境外国家{i}的交易金额(编码)",
247
+ )
248
+
249
+ # 其他国家 - 严格按照原始定义
250
+ schema.add_field(
251
+ "Y1_OVERS_OTHER_CTY_CNT", "int", comment="近一年其他境外国家的交易次数", aggregatable=True
252
+ )
253
+ schema.add_field(
254
+ "Y1_OVERS_OTHER_CTY_AMT", "decimal", comment="近一年其他境外国家的交易金额", aggregatable=True
255
+ )
264
256
 
265
257
  schema.set_monthly_unique(True) # D表每人每月唯一
266
258
  return schema
@@ -101,8 +101,10 @@ class SchemaDocumentGenerator:
101
101
  content = []
102
102
 
103
103
  # 文档头部
104
- content.append(f"# {business_domain} - {table_type.upper()}表结构文档\\n")
105
- content.append(f"## 基本信息\\n")
104
+ content.append(f"# {business_domain} - {table_type.upper()}表结构文档")
105
+ content.append("")
106
+ content.append(f"## 基本信息")
107
+ content.append("")
106
108
  content.append(f"- **表名**: `{schema.table_name}`")
107
109
  content.append(f"- **业务域**: {business_domain}")
108
110
  content.append(f"- **表类型**: {table_type}")
@@ -111,30 +113,37 @@ class SchemaDocumentGenerator:
111
113
  content.append("")
112
114
 
113
115
  # 表结构说明
114
- content.append("## 表结构说明\\n")
116
+ content.append("## 表结构说明")
117
+ content.append("")
115
118
  if hasattr(schema, 'description'):
116
- content.append(f"{schema.description}\\n")
119
+ content.append(f"{schema.description}")
120
+ content.append("")
117
121
 
118
122
  # 主键信息
119
123
  if hasattr(schema, 'primary_key') and schema.primary_key:
120
- content.append("### 主键字段\\n")
124
+ content.append("### 主键字段")
125
+ content.append("")
121
126
  content.append(f"- `{schema.primary_key}` (主键)")
122
127
  content.append("")
123
128
 
124
129
  # 日期字段
125
130
  if hasattr(schema, 'date_field') and schema.date_field:
126
- content.append("### 日期字段\\n")
131
+ content.append("### 日期字段")
132
+ content.append("")
127
133
  content.append(f"- `{schema.date_field}` (日期字段)")
128
134
  content.append("")
129
135
 
130
136
  # 字段详情表格
131
- content.append("## 字段详情\\n")
137
+ content.append("## 字段详情")
138
+ content.append("")
132
139
  content.append("| 字段名 | 数据类型 | 业务含义 | 可聚合 | 备注 |")
133
140
  content.append("|--------|----------|----------|--------|------|")
134
141
 
135
142
  if hasattr(schema, 'fields'):
136
143
  for field_name, field in schema.fields.items():
137
- field_type = str(field.field_type) if hasattr(field, 'field_type') else 'string'
144
+ # 简化数据类型显示
145
+ field_type_str = str(field.field_type) if hasattr(field, 'field_type') else 'string'
146
+ field_type = field_type_str.replace('FieldType.', '').lower()
138
147
  comment = field.comment if hasattr(field, 'comment') else ''
139
148
  aggregatable = '是' if getattr(field, 'aggregatable', False) else '否'
140
149
  remarks = '' # 可以从其他地方获取备注
@@ -144,9 +153,10 @@ class SchemaDocumentGenerator:
144
153
  content.append("")
145
154
 
146
155
  # 业务规则说明
147
- content.append("## 业务规则\\n")
156
+ content.append("## 业务规则")
157
+ content.append("")
148
158
  content.append("### 数据更新规则")
149
- if getattr(schema, 'monthly_unique', False):
159
+ if getattr(schema, 'is_monthly_unique', False):
150
160
  content.append("- 每人每月一条记录")
151
161
  content.append("- 月末批量更新")
152
162
  else:
@@ -162,7 +172,8 @@ class SchemaDocumentGenerator:
162
172
  content.append("")
163
173
 
164
174
  # 使用说明
165
- content.append("## 使用说明\\n")
175
+ content.append("## 使用说明")
176
+ content.append("")
166
177
  content.append("### 特征工程配置")
167
178
  if table_type == 'behavior':
168
179
  content.append("- 生成原始拷贝特征")
@@ -187,7 +198,7 @@ class SchemaDocumentGenerator:
187
198
  content.append("---")
188
199
  content.append("*本文档由Staran Schema自动生成*")
189
200
 
190
- return "\\n".join(content)
201
+ return "\n".join(content)
191
202
 
192
203
  def _generate_html_content(self, schema, business_domain: str, table_type: str) -> str:
193
204
  """生成HTML格式内容"""
@@ -238,7 +249,9 @@ class SchemaDocumentGenerator:
238
249
  # 添加字段行
239
250
  if hasattr(schema, 'fields'):
240
251
  for field_name, field in schema.fields.items():
241
- field_type = str(field.field_type) if hasattr(field, 'field_type') else 'string'
252
+ # 简化数据类型显示
253
+ field_type_str = str(field.field_type) if hasattr(field, 'field_type') else 'string'
254
+ field_type = field_type_str.replace('FieldType.', '').lower()
242
255
  comment = field.comment if hasattr(field, 'comment') else ''
243
256
  aggregatable = '是' if getattr(field, 'aggregatable', False) else '否'
244
257
  remarks = '' # 可以从其他地方获取备注
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: staran
3
- Version: 0.4.0
3
+ Version: 0.4.2
4
4
  Summary: staran - 高性能Python工具库
5
5
  Home-page: https://github.com/starlxa/staran
6
6
  Author: StarAn
@@ -24,7 +24,7 @@ Dynamic: requires-dist
24
24
  Dynamic: requires-python
25
25
  Dynamic: summary
26
26
 
27
- # Star## ✨ v0.4.0 新特性
27
+ # Star## ✨ v0.4.2 新特性
28
28
 
29
29
  - 📋 **独立Schema模块** - 专门的表结构定义和管理模块
30
30
  - 📄 **文档自动生成** - 支持Markdown/PDF/HTML格式的技术文档生成
@@ -42,7 +42,7 @@ Dynamic: summary
42
42
 
43
43
  Staran是一个强大的特征工程和数据处理工具包,提供从数据到模型的完整解决方案。特别针对工银图灵平台优化,让特征工程和模型训练变得前所未有的简单。
44
44
 
45
- ## ✨ v0.4.0 新特性
45
+ ## ✨ v0.4.2 新特性
46
46
 
47
47
  - �️ **模块化引擎架构** - 独立的引擎模块,支持Spark、Hive、图灵平台
48
48
  - 🔧 **统一接口设计** - 所有引擎提供一致的SQL生成、执行和下载接口
@@ -361,7 +361,7 @@ tomorrow = date.add_days(1) # 202504 (智能处理)
361
361
 
362
362
  ```
363
363
  staran/
364
- ├── __init__.py # 主包入口,v0.4.0功能导出
364
+ ├── __init__.py # 主包入口,v0.4.2功能导出
365
365
  ├── schemas/ # 🆕 表结构定义与文档生成模块
366
366
  │ ├── __init__.py # Schema模块入口
367
367
  │ ├── document_generator.py # 文档生成器 (MD/PDF/HTML)
@@ -385,7 +385,7 @@ staran/
385
385
  │ ├── __init__.py # 工具模块
386
386
  │ └── date.py # Date类实现
387
387
  ├── setup.py # 安装配置
388
- ├── README.md # 本文档 v0.4.0
388
+ ├── README.md # 本文档 v0.4.2
389
389
  └── quick-upload.sh # 快速部署脚本
390
390
  ```
391
391
 
@@ -561,4 +561,4 @@ MIT License
561
561
 
562
562
  ---
563
563
 
564
- **Staran v0.4.0** - 模块化引擎架构,让机器学习特征工程变得前所未有的简单 🌟
564
+ **Staran v0.4.2** - 模块化引擎架构,让机器学习特征工程变得前所未有的简单 🌟
@@ -13,12 +13,12 @@ staran/features/generator.py,sha256=CI1F_PshOvokQJelsqSaVp-SNQpMc-WVmjMQKzgdeLw,
13
13
  staran/features/manager.py,sha256=2-3Hc3qthtyzwiuQy5QTz6RfhKK3szoylconzI3moc4,5201
14
14
  staran/features/schema.py,sha256=FwOfpTcxq4K8zkO3MFNqKPQBp_e8qY-N6gazqm9_lAQ,6067
15
15
  staran/schemas/__init__.py,sha256=2RkcWCaIkrOHd37zzRCla0-jNg4cPnc6BGmmW5Vha0Y,652
16
- staran/schemas/document_generator.py,sha256=Lm9rim6yPnT1U_aStaM4KtU9eKxHnDNVfJIusQf5zQY,13120
17
- staran/schemas/aum/__init__.py,sha256=n_DuAH3ncaScS3hPR72Eq6RtcFj0WTp3fbXHVes3WsE,16743
16
+ staran/schemas/document_generator.py,sha256=Mr7TjmKwspqxXnp9DhzZxsRx0l2Bo7MOI8mOxRtgwxU,13600
17
+ staran/schemas/aum/__init__.py,sha256=jVkmJdhHGHdGE4rJ605zsRU2zIQMEHWnlgW2ZQk8AdU,13082
18
18
  staran/tools/__init__.py,sha256=KtudrYnxKD9HZEL4H-mrWlKrmsI3rYjJrLeC9YDTpG4,1054
19
19
  staran/tools/date.py,sha256=-QyEMWVx6czMuOIwcV7kR3gBMRVOwb5qevo7GEFSJKE,10488
20
- staran-0.4.0.dist-info/licenses/LICENSE,sha256=2EmsBIyDCono4iVXNpv5_px9qt2b7hfPq1WuyGVMNP4,1361
21
- staran-0.4.0.dist-info/METADATA,sha256=doZ3FJjMqxkOgqYKz74dwlJk4ICd4I0P38Qv48YSJG4,18809
22
- staran-0.4.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
23
- staran-0.4.0.dist-info/top_level.txt,sha256=NOUZtXSh5oSIEjHrC0lQ9WmoKtD010Q00dghWyag-Zs,7
24
- staran-0.4.0.dist-info/RECORD,,
20
+ staran-0.4.2.dist-info/licenses/LICENSE,sha256=2EmsBIyDCono4iVXNpv5_px9qt2b7hfPq1WuyGVMNP4,1361
21
+ staran-0.4.2.dist-info/METADATA,sha256=u8_IU7kGJqbFbKZLXUIgACqklcrSp56UWcawoWAtGtY,18809
22
+ staran-0.4.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
23
+ staran-0.4.2.dist-info/top_level.txt,sha256=NOUZtXSh5oSIEjHrC0lQ9WmoKtD010Q00dghWyag-Zs,7
24
+ staran-0.4.2.dist-info/RECORD,,
File without changes