staran 0.2.4__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,314 @@
1
+ """
2
+ AUM业务表结构定义模块
3
+
4
+ 包含AUM (资产管理)业务相关的所有标准表结构:
5
+ - 客户行为特征表 (AUMBehaviorSchema)
6
+ - 资产平均值表 (AUMAssetAvgSchema)
7
+ - 资产配置表 (AUMAssetConfigSchema)
8
+ - 月度统计表 (AUMMonthlyStatSchema)
9
+
10
+ 这些表结构可以用于:
11
+ 1. 特征工程流水线
12
+ 2. 数据模型构建
13
+ 3. 业务文档生成
14
+ 4. 数据质量检查
15
+ """
16
+
17
+ from typing import Dict
18
+ from ...features.schema import TableSchema
19
+ from ..document_generator import SchemaDocumentGenerator
20
+
21
+
22
+ class AUMBehaviorSchema:
23
+ """AUM客户行为特征表 - A表结构定义"""
24
+
25
+ @staticmethod
26
+ def create() -> TableSchema:
27
+ """创建客户行为特征表结构"""
28
+ schema = TableSchema('bi_hlwj_dfcw_f1_f4_wy')
29
+ schema.add_primary_key('party_id', 'string')
30
+ schema.add_date_field('data_dt', 'string')
31
+
32
+ # 基础信息字段
33
+ schema.add_field("AGE", "int", comment="客户年龄", aggregatable=True)
34
+ schema.add_field("GENDER", "string", comment="客户性别")
35
+ schema.add_field("EDU_LEVEL", "string", comment="教育水平")
36
+ schema.add_field("MARITAL_STATUS", "string", comment="婚姻状况")
37
+ schema.add_field("INCOME_LEVEL", "string", comment="收入水平")
38
+ schema.add_field("OCCUPATION", "string", comment="职业类型")
39
+ schema.add_field("CITY_LEVEL", "string", comment="城市等级")
40
+
41
+ # 账户信息字段
42
+ schema.add_field("ACCT_OPEN_MONTHS", "int", comment="开户月数", aggregatable=True)
43
+ schema.add_field("MAIN_ACCT_BAL", "decimal", comment="主账户余额", aggregatable=True)
44
+ schema.add_field("ACCT_COUNT", "int", comment="账户总数", aggregatable=True)
45
+ schema.add_field("DEPOSIT_ACCT_COUNT", "int", comment="存款账户数", aggregatable=True)
46
+ schema.add_field("LOAN_ACCT_COUNT", "int", comment="贷款账户数", aggregatable=True)
47
+ schema.add_field("CREDIT_CARD_COUNT", "int", comment="信用卡数量", aggregatable=True)
48
+
49
+ # 交易行为字段
50
+ schema.add_field("MON3_TXN_COUNT", "int", comment="近3月交易次数", aggregatable=True)
51
+ schema.add_field("MON3_TXN_AMT", "decimal", comment="近3月交易金额", aggregatable=True)
52
+ schema.add_field("MON6_TXN_COUNT", "int", comment="近6月交易次数", aggregatable=True)
53
+ schema.add_field("MON6_TXN_AMT", "decimal", comment="近6月交易金额", aggregatable=True)
54
+ schema.add_field("YEAR1_TXN_COUNT", "int", comment="近1年交易次数", aggregatable=True)
55
+ schema.add_field("YEAR1_TXN_AMT", "decimal", comment="近1年交易金额", aggregatable=True)
56
+
57
+ # 渠道使用偏好
58
+ schema.add_field("ONLINE_BANK_USAGE", "string", comment="网银使用频度")
59
+ schema.add_field("MOBILE_BANK_USAGE", "string", comment="手机银行使用频度")
60
+ schema.add_field("ATM_USAGE", "string", comment="ATM使用频度")
61
+ schema.add_field("COUNTER_USAGE", "string", comment="柜台使用频度")
62
+
63
+ # 产品持有情况
64
+ schema.add_field("WEALTH_PROD_COUNT", "int", comment="理财产品数量", aggregatable=True)
65
+ schema.add_field("FUND_PROD_COUNT", "int", comment="基金产品数量", aggregatable=True)
66
+ schema.add_field("INSURANCE_PROD_COUNT", "int", comment="保险产品数量", aggregatable=True)
67
+ schema.add_field("GOLD_PROD_COUNT", "int", comment="黄金产品数量", aggregatable=True)
68
+
69
+ # 风险评级相关
70
+ schema.add_field("RISK_LEVEL", "string", comment="风险等级")
71
+ schema.add_field("RISK_APPETITE", "string", comment="风险偏好")
72
+ schema.add_field("INVESTMENT_EXPERIENCE", "string", comment="投资经验")
73
+
74
+ # 服务渠道偏好
75
+ schema.add_field("PREFER_CHANNEL", "string", comment="偏好服务渠道")
76
+ schema.add_field("CONTACT_TIME_PREFER", "string", comment="联系时间偏好")
77
+
78
+ # 客户活跃度指标
79
+ schema.add_field("LOGIN_DAYS_MON3", "int", comment="近3月登录天数", aggregatable=True)
80
+ schema.add_field("LOGIN_DAYS_MON6", "int", comment="近6月登录天数", aggregatable=True)
81
+ schema.add_field("LAST_LOGIN_DAYS", "int", comment="最后登录距今天数", aggregatable=True)
82
+ schema.add_field("ACTIVE_LEVEL", "string", comment="活跃度等级")
83
+
84
+ # 客户价值指标
85
+ schema.add_field("CUSTOMER_VALUE_SCORE", "decimal", comment="客户价值评分", aggregatable=True)
86
+ schema.add_field("POTENTIAL_VALUE_SCORE", "decimal", comment="潜在价值评分", aggregatable=True)
87
+ schema.add_field("RETENTION_SCORE", "decimal", comment="留存倾向评分", aggregatable=True)
88
+
89
+ # 营销响应历史
90
+ schema.add_field("CAMPAIGN_RESPONSE_RATE", "decimal", comment="营销响应率", aggregatable=True)
91
+ schema.add_field("LAST_CAMPAIGN_RESPONSE", "string", comment="最近营销响应")
92
+ schema.add_field("PRODUCT_CROSS_SELL_COUNT", "int", comment="交叉销售产品数", aggregatable=True)
93
+
94
+ # 投诉与满意度
95
+ schema.add_field("COMPLAINT_COUNT_YEAR1", "int", comment="近1年投诉次数", aggregatable=True)
96
+ schema.add_field("SATISFACTION_SCORE", "decimal", comment="满意度评分", aggregatable=True)
97
+ schema.add_field("NPS_SCORE", "decimal", comment="净推荐值", aggregatable=True)
98
+
99
+ # 地理位置相关
100
+ schema.add_field("HOME_BRANCH_CODE", "string", comment="归属网点代码")
101
+ schema.add_field("FREQ_BRANCH_CODE", "string", comment="常用网点代码")
102
+ schema.add_field("CROSS_REGION_TXN", "string", comment="跨地区交易情况")
103
+
104
+ schema.set_monthly_unique(False) # A表每人每天一条记录
105
+ return schema
106
+
107
+
108
+ class AUMAssetAvgSchema:
109
+ """AUM资产平均值表 - B表结构定义"""
110
+
111
+ @staticmethod
112
+ def create() -> TableSchema:
113
+ """创建资产平均值表结构"""
114
+ schema = TableSchema('bi_hlwj_zi_chan_avg_wy')
115
+ schema.add_primary_key('party_id', 'string')
116
+ schema.add_date_field('data_dt', 'string')
117
+
118
+ # 各类资产平均余额
119
+ schema.add_field("TOTAL_ASSET_AVG", "decimal", comment="总资产平均值", aggregatable=True)
120
+ schema.add_field("DEPOSIT_AVG", "decimal", comment="存款平均余额", aggregatable=True)
121
+ schema.add_field("CURRENT_DEPOSIT_AVG", "decimal", comment="活期存款平均余额", aggregatable=True)
122
+ schema.add_field("TIME_DEPOSIT_AVG", "decimal", comment="定期存款平均余额", aggregatable=True)
123
+ schema.add_field("WEALTH_PRODUCT_AVG", "decimal", comment="理财产品平均余额", aggregatable=True)
124
+ schema.add_field("FUND_ASSET_AVG", "decimal", comment="基金资产平均值", aggregatable=True)
125
+ schema.add_field("INSURANCE_ASSET_AVG", "decimal", comment="保险资产平均值", aggregatable=True)
126
+ schema.add_field("BOND_ASSET_AVG", "decimal", comment="债券资产平均值", aggregatable=True)
127
+ schema.add_field("STOCK_ASSET_AVG", "decimal", comment="股票资产平均值", aggregatable=True)
128
+ schema.add_field("GOLD_ASSET_AVG", "decimal", comment="黄金资产平均值", aggregatable=True)
129
+ schema.add_field("FOREX_ASSET_AVG", "decimal", comment="外汇资产平均值", aggregatable=True)
130
+
131
+ # 负债相关平均值
132
+ schema.add_field("TOTAL_DEBT_AVG", "decimal", comment="总负债平均值", aggregatable=True)
133
+ schema.add_field("MORTGAGE_DEBT_AVG", "decimal", comment="房贷平均余额", aggregatable=True)
134
+ schema.add_field("CREDIT_CARD_DEBT_AVG", "decimal", comment="信用卡负债平均值", aggregatable=True)
135
+ schema.add_field("OTHER_LOAN_AVG", "decimal", comment="其他贷款平均余额", aggregatable=True)
136
+
137
+ schema.set_monthly_unique(True) # B表每人每月唯一
138
+ return schema
139
+
140
+
141
+ class AUMAssetConfigSchema:
142
+ """AUM资产配置表 - C表结构定义"""
143
+
144
+ @staticmethod
145
+ def create() -> TableSchema:
146
+ """创建资产配置表结构"""
147
+ schema = TableSchema('bi_hlwj_zi_chang_month_total_zb')
148
+ schema.add_primary_key('party_id', 'string')
149
+ schema.add_date_field('data_dt', 'string')
150
+
151
+ # 资产配置比例
152
+ schema.add_field("DEPOSIT_RATIO", "decimal", comment="存款资产占比", aggregatable=True)
153
+ schema.add_field("WEALTH_RATIO", "decimal", comment="理财产品占比", aggregatable=True)
154
+ schema.add_field("FUND_RATIO", "decimal", comment="基金资产占比", aggregatable=True)
155
+ schema.add_field("INSURANCE_RATIO", "decimal", comment="保险资产占比", aggregatable=True)
156
+ schema.add_field("BOND_RATIO", "decimal", comment="债券资产占比", aggregatable=True)
157
+ schema.add_field("STOCK_RATIO", "decimal", comment="股票资产占比", aggregatable=True)
158
+ schema.add_field("GOLD_RATIO", "decimal", comment="黄金资产占比", aggregatable=True)
159
+ schema.add_field("FOREX_RATIO", "decimal", comment="外汇资产占比", aggregatable=True)
160
+
161
+ # 风险资产vs安全资产配置
162
+ schema.add_field("HIGH_RISK_RATIO", "decimal", comment="高风险资产占比", aggregatable=True)
163
+ schema.add_field("MEDIUM_RISK_RATIO", "decimal", comment="中风险资产占比", aggregatable=True)
164
+ schema.add_field("LOW_RISK_RATIO", "decimal", comment="低风险资产占比", aggregatable=True)
165
+ schema.add_field("SAFE_ASSET_RATIO", "decimal", comment="安全资产占比", aggregatable=True)
166
+
167
+ # 流动性配置
168
+ schema.add_field("HIGH_LIQUIDITY_RATIO", "decimal", comment="高流动性资产占比", aggregatable=True)
169
+ schema.add_field("MEDIUM_LIQUIDITY_RATIO", "decimal", comment="中流动性资产占比", aggregatable=True)
170
+ schema.add_field("LOW_LIQUIDITY_RATIO", "decimal", comment="低流动性资产占比", aggregatable=True)
171
+
172
+ # 期限结构配置
173
+ schema.add_field("SHORT_TERM_RATIO", "decimal", comment="短期资产占比", aggregatable=True)
174
+ schema.add_field("MEDIUM_TERM_RATIO", "decimal", comment="中期资产占比", aggregatable=True)
175
+ schema.add_field("LONG_TERM_RATIO", "decimal", comment="长期资产占比", aggregatable=True)
176
+
177
+ # 货币配置
178
+ schema.add_field("RMB_ASSET_RATIO", "decimal", comment="人民币资产占比", aggregatable=True)
179
+ schema.add_field("USD_ASSET_RATIO", "decimal", comment="美元资产占比", aggregatable=True)
180
+ schema.add_field("EUR_ASSET_RATIO", "decimal", comment="欧元资产占比", aggregatable=True)
181
+ schema.add_field("OTHER_CURRENCY_RATIO", "decimal", comment="其他货币资产占比", aggregatable=True)
182
+
183
+ # 配置集中度指标
184
+ schema.add_field("ASSET_CONCENTRATION_INDEX", "decimal", comment="资产集中度指数", aggregatable=True)
185
+ schema.add_field("DIVERSIFICATION_SCORE", "decimal", comment="分散化程度评分", aggregatable=True)
186
+
187
+ # 动态配置指标
188
+ schema.add_field("CONFIG_CHANGE_FREQ", "int", comment="配置调整频率", aggregatable=True)
189
+ schema.add_field("LAST_REBALANCE_DAYS", "int", comment="最后再平衡距今天数", aggregatable=True)
190
+
191
+ # 配置绩效相关
192
+ schema.add_field("CONFIG_RETURN_RATE", "decimal", comment="配置收益率", aggregatable=True)
193
+ schema.add_field("RISK_ADJUSTED_RETURN", "decimal", comment="风险调整收益", aggregatable=True)
194
+ schema.add_field("SHARPE_RATIO", "decimal", comment="夏普比率", aggregatable=True)
195
+
196
+ # 配置建议相关
197
+ schema.add_field("OPTIMAL_CONFIG_SCORE", "decimal", comment="最优配置评分", aggregatable=True)
198
+ schema.add_field("CONFIG_IMPROVEMENT_POTENTIAL", "decimal", comment="配置优化潜力", aggregatable=True)
199
+
200
+ schema.set_monthly_unique(True) # C表每人每月唯一
201
+ return schema
202
+
203
+
204
+ class AUMMonthlyStatSchema:
205
+ """AUM月度统计表 - D表结构定义"""
206
+
207
+ @staticmethod
208
+ def create() -> TableSchema:
209
+ """创建月度统计表结构"""
210
+ schema = TableSchema('bi_hlwj_realy_month_stat_wy')
211
+ schema.add_primary_key('party_dt', 'string') # 注意这个表的主键是party_dt
212
+ schema.add_date_field('data_dt', 'string')
213
+
214
+ # 渠道存取款字段
215
+ channels = {
216
+ "CASH_DEPIST": "现金",
217
+ "REMIT": "汇款",
218
+ "YY": "邮政储蓄",
219
+ "UNIONPAY": "银联",
220
+ "FIN_ASSET": "理财产品",
221
+ "CORP_ACCT": "对公账户"
222
+ }
223
+
224
+ for prefix, desc in channels.items():
225
+ schema.add_field(f"{prefix}_IN", "decimal", comment=f"{desc}存入金额", aggregatable=True)
226
+ schema.add_field(f"{prefix}_OUT", "decimal", comment=f"{desc}取出金额", aggregatable=True)
227
+
228
+ # 其他存取款字段
229
+ schema.add_field("AGENT_SALARY_IN", "decimal", comment="代发工资存入金额", aggregatable=True)
230
+ schema.add_field("CREDIT_CARD_OUT", "decimal", comment="信用卡取出金额", aggregatable=True)
231
+ schema.add_field("DEBIT_CARD_OUT", "decimal", comment="借记卡取出金额", aggregatable=True)
232
+ schema.add_field("BATCH_DEDUCT_OUT", "decimal", comment="批量扣款金额", aggregatable=True)
233
+
234
+ # 交易渠道指标字段
235
+ fields = [
236
+ ("DEBIT_CARD", "借记卡", "MON3"),
237
+ ("CREDIT_CARD", "信用卡", "MON3"),
238
+ ("THIRD_PAYMENT", "第三方支付", "MON3"),
239
+ ("MOBBANK", "手机银行", "MON12"),
240
+ ("TELBANK", "电话银行", "MON12")
241
+ ]
242
+
243
+ metrics = [("TX_CNT", "交易次数"), ("TX_AMT", "交易金额")]
244
+
245
+ for channel, desc, period in fields:
246
+ for metric_code, metric_desc in metrics:
247
+ field_name = f"{channel}_{metric_code}_{period}"
248
+ description = f"{desc}{metric_desc}(近{period[-2:]}个月)"
249
+ schema.add_field(field_name, "decimal", comment=description, aggregatable=True)
250
+
251
+ # 其他交易字段
252
+ schema.add_field("COUNTER_TX_CNT_MON12", "int", comment="柜台交易次数(近12个月)", aggregatable=True)
253
+ schema.add_field("WEBBANK_TX_CNT_MON12", "int", comment="网银交易次数(近12个月)", aggregatable=True)
254
+
255
+ # 境外交易字段
256
+ for i in range(1, 6):
257
+ schema.add_field(f"Y1_OVERS_CTY{i}_CNT", "int", comment=f"近一年境外国家{i}的交易次数", aggregatable=True)
258
+ schema.add_field(f"Y1_OVERS_CNT_CTY{i}_CD", "string", comment=f"近一年境外国家{i}的交易次数(编码)")
259
+ schema.add_field(f"Y1_OVERS_CTY{i}_AMT", "decimal", comment=f"近一年境外国家{i}的交易金额", aggregatable=True)
260
+ schema.add_field(f"Y1_OVERS_AMT_CTY{i}_CD", "string", comment=f"近一年境外国家{i}的交易金额(编码)")
261
+
262
+ schema.add_field("Y1_OVERS_OTHER_CTY_CNT", "int", comment="近一年其他境外国家的交易次数", aggregatable=True)
263
+ schema.add_field("Y1_OVERS_OTHER_CTY_AMT", "decimal", comment="近一年其他境外国家的交易金额", aggregatable=True)
264
+
265
+ schema.set_monthly_unique(True) # D表每人每月唯一
266
+ return schema
267
+
268
+
269
+ def get_aum_schemas() -> Dict[str, TableSchema]:
270
+ """获取所有AUM业务表结构"""
271
+ return {
272
+ 'behavior': AUMBehaviorSchema.create(),
273
+ 'asset_avg': AUMAssetAvgSchema.create(),
274
+ 'asset_config': AUMAssetConfigSchema.create(),
275
+ 'monthly_stat': AUMMonthlyStatSchema.create()
276
+ }
277
+
278
+
279
+ def export_aum_docs(output_dir: str = "./docs", format_type: str = "markdown") -> Dict[str, str]:
280
+ """
281
+ 导出AUM表结构文档
282
+
283
+ Args:
284
+ output_dir: 输出目录
285
+ format_type: 文档格式 ('markdown' 或 'pdf')
286
+
287
+ Returns:
288
+ 生成的文档文件路径字典
289
+ """
290
+ schemas = get_aum_schemas()
291
+ generator = SchemaDocumentGenerator()
292
+
293
+ results = {}
294
+ for table_type, schema in schemas.items():
295
+ file_path = generator.export_schema_doc(
296
+ schema=schema,
297
+ business_domain="AUM",
298
+ table_type=table_type,
299
+ output_dir=output_dir,
300
+ format_type=format_type
301
+ )
302
+ results[table_type] = file_path
303
+
304
+ return results
305
+
306
+
307
+ __all__ = [
308
+ 'AUMBehaviorSchema',
309
+ 'AUMAssetAvgSchema',
310
+ 'AUMAssetConfigSchema',
311
+ 'AUMMonthlyStatSchema',
312
+ 'get_aum_schemas',
313
+ 'export_aum_docs'
314
+ ]
@@ -0,0 +1,337 @@
1
+ """
2
+ Schema文档生成器
3
+
4
+ 支持根据表结构定义生成多种格式的技术文档:
5
+ - Markdown格式:适合开发团队和版本控制
6
+ - PDF格式:适合正式交付和业务方审阅
7
+ - HTML格式:适合在线查看和分享
8
+
9
+ 主要功能:
10
+ 1. 表结构自动解析
11
+ 2. 字段信息格式化
12
+ 3. 业务含义说明
13
+ 4. 技术规范文档
14
+ 5. 多格式导出支持
15
+ """
16
+
17
+ import os
18
+ from typing import Dict, List, Optional
19
+ from datetime import datetime
20
+
21
+
22
+ class SchemaDocumentGenerator:
23
+ """表结构文档生成器"""
24
+
25
+ def __init__(self):
26
+ self.template_configs = {
27
+ 'markdown': {
28
+ 'extension': '.md',
29
+ 'header_template': self._get_markdown_header_template(),
30
+ 'field_template': self._get_markdown_field_template(),
31
+ 'footer_template': self._get_markdown_footer_template()
32
+ },
33
+ 'pdf': {
34
+ 'extension': '.pdf',
35
+ 'requires_conversion': True,
36
+ 'base_format': 'markdown' # 先生成MD再转PDF
37
+ },
38
+ 'html': {
39
+ 'extension': '.html',
40
+ 'header_template': self._get_html_header_template(),
41
+ 'field_template': self._get_html_field_template(),
42
+ 'footer_template': self._get_html_footer_template()
43
+ }
44
+ }
45
+
46
+ def export_schema_doc(self, schema, business_domain: str, table_type: str,
47
+ output_dir: str = "./docs", format_type: str = "markdown") -> str:
48
+ """
49
+ 导出表结构文档
50
+
51
+ Args:
52
+ schema: TableSchema对象
53
+ business_domain: 业务域名称 (如: AUM, CRM, RISK)
54
+ table_type: 表类型 (如: behavior, asset_avg)
55
+ output_dir: 输出目录
56
+ format_type: 文档格式 ('markdown', 'pdf', 'html')
57
+
58
+ Returns:
59
+ 生成的文档文件路径
60
+ """
61
+ # 确保输出目录存在
62
+ os.makedirs(output_dir, exist_ok=True)
63
+
64
+ # 生成文件名
65
+ timestamp = datetime.now().strftime("%Y%m%d")
66
+ filename = f"{business_domain}_{table_type}_schema_{timestamp}"
67
+
68
+ if format_type.lower() == 'pdf':
69
+ # PDF格式先生成Markdown再转换
70
+ md_content = self._generate_markdown_content(schema, business_domain, table_type)
71
+ md_path = os.path.join(output_dir, f"{filename}.md")
72
+
73
+ with open(md_path, 'w', encoding='utf-8') as f:
74
+ f.write(md_content)
75
+
76
+ # 转换为PDF (这里可以集成pandoc或其他转换工具)
77
+ pdf_path = os.path.join(output_dir, f"{filename}.pdf")
78
+ self._convert_md_to_pdf(md_path, pdf_path)
79
+ return pdf_path
80
+
81
+ elif format_type.lower() == 'html':
82
+ # HTML格式
83
+ html_content = self._generate_html_content(schema, business_domain, table_type)
84
+ html_path = os.path.join(output_dir, f"{filename}.html")
85
+
86
+ with open(html_path, 'w', encoding='utf-8') as f:
87
+ f.write(html_content)
88
+ return html_path
89
+
90
+ else:
91
+ # 默认Markdown格式
92
+ md_content = self._generate_markdown_content(schema, business_domain, table_type)
93
+ md_path = os.path.join(output_dir, f"{filename}.md")
94
+
95
+ with open(md_path, 'w', encoding='utf-8') as f:
96
+ f.write(md_content)
97
+ return md_path
98
+
99
+ def _generate_markdown_content(self, schema, business_domain: str, table_type: str) -> str:
100
+ """生成Markdown格式内容"""
101
+ content = []
102
+
103
+ # 文档头部
104
+ content.append(f"# {business_domain} - {table_type.upper()}表结构文档\\n")
105
+ content.append(f"## 基本信息\\n")
106
+ content.append(f"- **表名**: `{schema.table_name}`")
107
+ content.append(f"- **业务域**: {business_domain}")
108
+ content.append(f"- **表类型**: {table_type}")
109
+ content.append(f"- **生成时间**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
110
+ content.append(f"- **月度唯一性**: {'是' if getattr(schema, 'is_monthly_unique', False) else '否'}")
111
+ content.append("")
112
+
113
+ # 表结构说明
114
+ content.append("## 表结构说明\\n")
115
+ if hasattr(schema, 'description'):
116
+ content.append(f"{schema.description}\\n")
117
+
118
+ # 主键信息
119
+ if hasattr(schema, 'primary_key') and schema.primary_key:
120
+ content.append("### 主键字段\\n")
121
+ content.append(f"- `{schema.primary_key}` (主键)")
122
+ content.append("")
123
+
124
+ # 日期字段
125
+ if hasattr(schema, 'date_field') and schema.date_field:
126
+ content.append("### 日期字段\\n")
127
+ content.append(f"- `{schema.date_field}` (日期字段)")
128
+ content.append("")
129
+
130
+ # 字段详情表格
131
+ content.append("## 字段详情\\n")
132
+ content.append("| 字段名 | 数据类型 | 业务含义 | 可聚合 | 备注 |")
133
+ content.append("|--------|----------|----------|--------|------|")
134
+
135
+ if hasattr(schema, 'fields'):
136
+ for field_name, field in schema.fields.items():
137
+ field_type = str(field.field_type) if hasattr(field, 'field_type') else 'string'
138
+ comment = field.comment if hasattr(field, 'comment') else ''
139
+ aggregatable = '是' if getattr(field, 'aggregatable', False) else '否'
140
+ remarks = '' # 可以从其他地方获取备注
141
+
142
+ content.append(f"| `{field_name}` | {field_type} | {comment} | {aggregatable} | {remarks} |")
143
+
144
+ content.append("")
145
+
146
+ # 业务规则说明
147
+ content.append("## 业务规则\\n")
148
+ content.append("### 数据更新规则")
149
+ if getattr(schema, 'monthly_unique', False):
150
+ content.append("- 每人每月一条记录")
151
+ content.append("- 月末批量更新")
152
+ else:
153
+ content.append("- 每人每日一条记录")
154
+ content.append("- 日终批量更新")
155
+ content.append("")
156
+
157
+ content.append("### 数据质量要求")
158
+ content.append("- 主键字段不允许为空")
159
+ content.append("- 日期字段格式统一为YYYYMMDD")
160
+ content.append("- 金额字段精度保持2位小数")
161
+ content.append("- 比例字段取值范围[0,1]")
162
+ content.append("")
163
+
164
+ # 使用说明
165
+ content.append("## 使用说明\\n")
166
+ content.append("### 特征工程配置")
167
+ if table_type == 'behavior':
168
+ content.append("- 生成原始拷贝特征")
169
+ content.append("- 生成聚合特征")
170
+ content.append("- 不生成环比、同比特征")
171
+ else:
172
+ content.append("- 生成聚合特征")
173
+ content.append("- 生成5个月环比特征")
174
+ content.append("- 生成1年同比特征")
175
+ content.append("")
176
+
177
+ content.append("### 示例SQL查询")
178
+ content.append("```sql")
179
+ content.append(f"-- 查询最新数据")
180
+ content.append(f"SELECT * FROM {schema.table_name}")
181
+ content.append(f"WHERE data_dt = (SELECT MAX(data_dt) FROM {schema.table_name})")
182
+ content.append(f"LIMIT 10;")
183
+ content.append("```")
184
+ content.append("")
185
+
186
+ # 文档尾部
187
+ content.append("---")
188
+ content.append("*本文档由Staran Schema自动生成*")
189
+
190
+ return "\\n".join(content)
191
+
192
+ def _generate_html_content(self, schema, business_domain: str, table_type: str) -> str:
193
+ """生成HTML格式内容"""
194
+ # 基础HTML模板,可以根据需要扩展
195
+ html_content = f"""
196
+ <!DOCTYPE html>
197
+ <html lang="zh-CN">
198
+ <head>
199
+ <meta charset="UTF-8">
200
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
201
+ <title>{business_domain} - {table_type.upper()}表结构文档</title>
202
+ <style>
203
+ body {{ font-family: 'Microsoft YaHei', Arial, sans-serif; margin: 40px; }}
204
+ h1, h2, h3 {{ color: #333; }}
205
+ table {{ border-collapse: collapse; width: 100%; margin: 20px 0; }}
206
+ th, td {{ border: 1px solid #ddd; padding: 12px; text-align: left; }}
207
+ th {{ background-color: #f2f2f2; font-weight: bold; }}
208
+ code {{ background-color: #f4f4f4; padding: 2px 4px; border-radius: 3px; }}
209
+ .info-table {{ background-color: #f9f9f9; }}
210
+ </style>
211
+ </head>
212
+ <body>
213
+ <h1>{business_domain} - {table_type.upper()}表结构文档</h1>
214
+
215
+ <h2>基本信息</h2>
216
+ <table class="info-table">
217
+ <tr><th>表名</th><td><code>{schema.table_name}</code></td></tr>
218
+ <tr><th>业务域</th><td>{business_domain}</td></tr>
219
+ <tr><th>表类型</th><td>{table_type}</td></tr>
220
+ <tr><th>生成时间</th><td>{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}</td></tr>
221
+ <tr><th>月度唯一性</th><td>{'是' if getattr(schema, 'is_monthly_unique', False) else '否'}</td></tr>
222
+ </table>
223
+
224
+ <h2>字段详情</h2>
225
+ <table>
226
+ <thead>
227
+ <tr>
228
+ <th>字段名</th>
229
+ <th>数据类型</th>
230
+ <th>业务含义</th>
231
+ <th>可聚合</th>
232
+ <th>备注</th>
233
+ </tr>
234
+ </thead>
235
+ <tbody>
236
+ """
237
+
238
+ # 添加字段行
239
+ if hasattr(schema, 'fields'):
240
+ for field_name, field in schema.fields.items():
241
+ field_type = str(field.field_type) if hasattr(field, 'field_type') else 'string'
242
+ comment = field.comment if hasattr(field, 'comment') else ''
243
+ aggregatable = '是' if getattr(field, 'aggregatable', False) else '否'
244
+ remarks = '' # 可以从其他地方获取备注
245
+
246
+ html_content += f"""
247
+ <tr>
248
+ <td><code>{field_name}</code></td>
249
+ <td>{field_type}</td>
250
+ <td>{comment}</td>
251
+ <td>{aggregatable}</td>
252
+ <td>{remarks}</td>
253
+ </tr>"""
254
+
255
+ html_content += """
256
+ </tbody>
257
+ </table>
258
+
259
+ <hr>
260
+ <p><em>本文档由Staran Schema自动生成</em></p>
261
+ </body>
262
+ </html>"""
263
+
264
+ return html_content
265
+
266
+ def _convert_md_to_pdf(self, md_path: str, pdf_path: str):
267
+ """将Markdown转换为PDF (需要安装pandoc或其他转换工具)"""
268
+ try:
269
+ import subprocess
270
+ # 尝试使用pandoc转换
271
+ subprocess.run([
272
+ 'pandoc', md_path, '-o', pdf_path,
273
+ '--pdf-engine=xelatex',
274
+ '--variable=CJKmainfont:Microsoft YaHei'
275
+ ], check=True)
276
+ except (subprocess.CalledProcessError, FileNotFoundError):
277
+ # 如果pandoc不可用,创建一个说明文件
278
+ with open(pdf_path.replace('.pdf', '_conversion_note.txt'), 'w', encoding='utf-8') as f:
279
+ f.write(f"PDF转换说明:\\n")
280
+ f.write(f"原始Markdown文件:{md_path}\\n")
281
+ f.write(f"如需PDF格式,请安装pandoc工具:\\n")
282
+ f.write(f"pip install pandoc\\n")
283
+ f.write(f"或访问:https://pandoc.org/installing.html\\n")
284
+
285
+ def _get_markdown_header_template(self) -> str:
286
+ return "# {title}\\n\\n## 基本信息\\n\\n"
287
+
288
+ def _get_markdown_field_template(self) -> str:
289
+ return "| {name} | {type} | {comment} | {aggregatable} |\\n"
290
+
291
+ def _get_markdown_footer_template(self) -> str:
292
+ return "\\n---\\n*文档生成时间: {timestamp}*\\n"
293
+
294
+ def _get_html_header_template(self) -> str:
295
+ return "<h1>{title}</h1>\\n<h2>基本信息</h2>\\n"
296
+
297
+ def _get_html_field_template(self) -> str:
298
+ return "<tr><td>{name}</td><td>{type}</td><td>{comment}</td><td>{aggregatable}</td></tr>\\n"
299
+
300
+ def _get_html_footer_template(self) -> str:
301
+ return "<hr><p><em>文档生成时间: {timestamp}</em></p>\\n"
302
+
303
+
304
+ def export_business_docs(business_domain: str, schemas_dict: Dict, output_dir: str = "./docs",
305
+ format_type: str = "markdown") -> Dict[str, str]:
306
+ """
307
+ 批量导出业务域表结构文档
308
+
309
+ Args:
310
+ business_domain: 业务域名称
311
+ schemas_dict: 表结构字典 {table_type: schema}
312
+ output_dir: 输出目录
313
+ format_type: 文档格式
314
+
315
+ Returns:
316
+ 生成的文档文件路径字典
317
+ """
318
+ generator = SchemaDocumentGenerator()
319
+ results = {}
320
+
321
+ for table_type, schema in schemas_dict.items():
322
+ file_path = generator.export_schema_doc(
323
+ schema=schema,
324
+ business_domain=business_domain,
325
+ table_type=table_type,
326
+ output_dir=output_dir,
327
+ format_type=format_type
328
+ )
329
+ results[table_type] = file_path
330
+
331
+ return results
332
+
333
+
334
+ __all__ = [
335
+ 'SchemaDocumentGenerator',
336
+ 'export_business_docs'
337
+ ]