staran 0.5.0__py3-none-any.whl → 0.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,26 +1,29 @@
1
1
  """
2
- AUM业务表结构定义模块
2
+ 新疆工行代发长尾客户表结构定义模块
3
3
 
4
- 包含AUM (资产管理)业务相关的所有标准表结构:
5
- - 客户行为特征表 (AUMBehaviorSchema)
6
- - 资产平均值表 (AUMAssetAvgSchema)
7
- - 资产配置表 (AUMAssetConfigSchema)
8
- - 月度统计表 (AUMMonthlyStatSchema)
4
+ 包含新疆工行代发长尾客户相关的所有表结构:
5
+ - 代发长尾客户行为特征表 (XinjiangICBCDaifaLongtailBehaviorSchema)
6
+ - 代发长尾客户资产平均值表 (XinjiangICBCDaifaLongtailAssetAvgSchema)
7
+ - 代发长尾客户资产配置表 (XinjiangICBCDaifaLongtailAssetConfigSchema)
8
+ - 代发长尾客户月度统计表 (XinjiangICBCDaifaLongtailMonthlyStatSchema)
9
+
10
+ 数据库: xinjiang_icbc_daifa_longtail
11
+ 业务范围: 代发长尾客户(资产10k-100k)
9
12
 
10
13
  这些表结构可以用于:
11
- 1. 特征工程流水线
12
- 2. 数据模型构建
14
+ 1. 代发长尾客户特征工程
15
+ 2. 提升模型和防流失模型构建
13
16
  3. 业务文档生成
14
17
  4. 数据质量检查
15
18
  """
16
19
 
17
20
  from typing import Dict
18
21
  from ...features.schema import TableSchema
19
- from ..document_generator import SchemaDocumentGenerator
22
+ from ...tools.document_generator import SchemaDocumentGenerator
20
23
 
21
24
 
22
- class AUMBehaviorSchema:
23
- """AUM客户行为特征表 - A表结构定义(严格按照已提供给行方的字段)"""
25
+ class XinjiangICBCDaifaLongtailBehaviorSchema:
26
+ """新疆工行代发长尾客户行为特征表 - 严格按照已提供给行方的字段"""
24
27
 
25
28
  # 统计指标定义 - 与原始定义完全一致
26
29
  _STATS = [
@@ -36,8 +39,8 @@ class AUMBehaviorSchema:
36
39
 
37
40
  @staticmethod
38
41
  def create() -> TableSchema:
39
- """创建客户行为特征表结构 - 严格按照已提供给行方的字段定义"""
40
- schema = TableSchema('bi_hlwj_dfcw_f1_f4_wy')
42
+ """创建新疆工行代发长尾客户行为特征表结构"""
43
+ schema = TableSchema('xinjiang_icbc_daifa_hlwj_dfcw_f1_f4_wy')
41
44
  schema.add_primary_key('party_id', 'string')
42
45
  schema.add_date_field('data_dt', 'string')
43
46
 
@@ -52,7 +55,7 @@ class AUMBehaviorSchema:
52
55
  schema.add_field("orderidcount", "int", comment="订单数", aggregatable=True)
53
56
  schema.add_field("label", "float", comment="标签值(如是否购买)", aggregatable=True)
54
57
 
55
- # productamount and m1 ~ m4 的含义描述 - 严格按照原始定义
58
+ # productamount and m1 ~ m4 的含义描述
56
59
  m_fields = {
57
60
  "productamount": "购买金额",
58
61
  "m1": "去重订单数",
@@ -61,246 +64,147 @@ class AUMBehaviorSchema:
61
64
  "m4": "去重产品品类数",
62
65
  }
63
66
 
64
- # 使用循环注册 productamount and m1~m4 各统计字段 - 严格按照原始逻辑
67
+ # 使用循环注册 productamount and m1~m4 各统计字段
65
68
  for prefix, meaning in m_fields.items():
66
- for stat_key, stat_desc in AUMBehaviorSchema._STATS:
69
+ for stat_key, stat_desc in XinjiangICBCDaifaLongtailBehaviorSchema._STATS:
67
70
  field_name = f"{prefix}_{stat_key}"
68
71
  description = f"{meaning}的{stat_desc}"
69
72
  schema.add_field(field_name, "float", comment=description, aggregatable=True)
70
73
 
71
- # 其他字段 - 严格按照原始定义
74
+ # 其他字段
72
75
  schema.add_field("life_day", "float", comment="客户生命周期天数", aggregatable=True)
73
76
  schema.add_field("gender", "float", comment="性别(编码)", aggregatable=True)
74
77
  schema.add_field("open_day", "float", comment="开户天数", aggregatable=True)
75
78
 
76
- schema.set_monthly_unique(False) # A表每人每日记录
77
- return schema
78
- schema.add_field("FREQ_BRANCH_CODE", "string", comment="常用网点代码")
79
- schema.add_field("CROSS_REGION_TXN", "string", comment="跨地区交易情况")
80
-
81
- schema.set_monthly_unique(False) # A表每人每天一条记录
79
+ schema.set_monthly_unique(False) # 每人每日记录
82
80
  return schema
83
81
 
84
82
 
85
- class AUMAssetAvgSchema:
86
- """AUM资产平均值表 - B表结构定义(严格按照已提供给行方的字段)"""
83
+ class XinjiangICBCDaifaLongtailAssetAvgSchema:
84
+ """新疆工行代发长尾客户资产平均值表"""
87
85
 
88
86
  @staticmethod
89
87
  def create() -> TableSchema:
90
- """创建资产平均值表结构 - 严格按照原始定义"""
91
- schema = TableSchema('bi_hlwj_zi_chan_avg_wy')
88
+ """创建新疆工行代发长尾客户资产平均值表结构"""
89
+ schema = TableSchema('xinjiang_icbc_daifa_hlwj_zi_chan_avg_wy')
92
90
  schema.add_primary_key('party_id', 'string')
93
91
  schema.add_date_field('data_dt', 'string')
94
92
 
95
- # 基础余额字段 - 严格按照原始定义
93
+ # 基础余额字段
96
94
  schema.add_field("asset_total_bal", "decimal", comment="总资产余额", aggregatable=True)
97
95
  schema.add_field("liab_total_bal", "decimal", comment="总负债余额", aggregatable=True)
98
- schema.add_field("dpsit_total_bal", "decimal", comment="存款总余额", aggregatable=True)
99
- schema.add_field("loan_total_bal", "decimal", comment="贷款总余额", aggregatable=True)
100
- schema.add_field("card_total_bal", "decimal", comment="信用卡总余额", aggregatable=True)
101
- schema.add_field("mid_busi_total_bal", "decimal", comment="中间业务总余额", aggregatable=True)
102
-
103
- # Register average balance fields - 严格按照原始逻辑
104
- for period in ["month", "year", "3", "6", "12"]:
105
- schema.add_field(
106
- f"avg_asset_bal_{period}", "decimal", comment=f"平均资产余额 ({period}期)", aggregatable=True
107
- )
108
- for period in ["3", "12"]:
109
- schema.add_field(
110
- f"avg_dpsit_bal_{period}", "decimal", comment=f"平均存款余额 ({period}期)", aggregatable=True
111
- )
96
+ schema.add_field("net_asset_bal", "decimal", comment="净资产余额", aggregatable=True)
112
97
 
113
- schema.set_monthly_unique(True) # B表每人每月唯一
98
+ # 存款相关字段
99
+ schema.add_field("dep_bal", "decimal", comment="存款余额", aggregatable=True)
100
+ schema.add_field("current_dep_bal", "decimal", comment="活期存款余额", aggregatable=True)
101
+ schema.add_field("time_dep_bal", "decimal", comment="定期存款余额", aggregatable=True)
102
+
103
+ # 理财投资字段
104
+ schema.add_field("wealth_bal", "decimal", comment="理财余额", aggregatable=True)
105
+ schema.add_field("fund_bal", "decimal", comment="基金余额", aggregatable=True)
106
+ schema.add_field("insurance_bal", "decimal", comment="保险余额", aggregatable=True)
107
+
108
+ schema.set_monthly_unique(True) # 每人每月一条记录
114
109
  return schema
115
110
 
116
111
 
117
- class AUMAssetConfigSchema:
118
- """AUM资产配置表 - C表结构定义(严格按照已提供给行方的字段)"""
112
+ class XinjiangICBCDaifaLongtailAssetConfigSchema:
113
+ """新疆工行代发长尾客户资产配置表"""
119
114
 
120
115
  @staticmethod
121
116
  def create() -> TableSchema:
122
- """创建资产配置表结构 - 严格按照原始定义"""
123
- schema = TableSchema('bi_hlwj_zi_chang_month_total_zb')
117
+ """创建新疆工行代发长尾客户资产配置表结构"""
118
+ schema = TableSchema('xinjiang_icbc_daifa_hlwj_zi_chan_config_wy')
124
119
  schema.add_primary_key('party_id', 'string')
125
120
  schema.add_date_field('data_dt', 'string')
126
121
 
127
- # asset_fields - 严格按照原始定义
128
- asset_fields = [
129
- ("seg_asset_total", "总资产余额"),
130
- ("indv_consm_loan_amt", "个人消费贷款余额"),
131
- ("indv_house_loan_amt", "个人住房贷款余额"),
132
- ("indv_oper_loan_amt", "个人经营贷款余额"),
133
- ("dpsit_bal", "存款余额"),
134
- ("tbond_bal", "国债余额"),
135
- ("fund_bal", "基金余额"),
136
- ("bond_bal", "债券余额"),
137
- ("gold_bal", "黄金余额"),
138
- ("wcurr_chrem_bal", "外币现钞余额"),
139
- ("presv_mgmt_secu_bal", "保值管理证券余额"),
140
- ("insure_form_bal", "保险单余额"),
141
- ("crdt_card_od_bal", "信用卡透支余额"),
142
- ("crdt_card_con_amt", "信用卡消费金额"),
143
- ("semi_crdt_card_od_bal", "准贷记卡透支余额"),
144
- ("semi_crdt_card_con_amt", "准贷记卡消费金额"),
145
- ("inter_card_con_amt", "国际卡消费金额"),
146
- ("inter_card_od_bal", "国际卡透支余额"),
147
- ("crdt_card_dpsit_bal", "信用卡存款余额"),
148
- ("semi_crdt_card_dpsit_bal", "准贷记卡存款余额"),
149
- ("inter_card_dpsit_bal", "国际卡存款余额"),
150
- ("silver_bal", "白银余额"),
151
- ("agent_solid_silver_bal", "代发实物白银余额"),
152
- ("pt_bal", "个人养老金余额"),
153
- ("pd_bal", "个人养老金存款余额"),
154
- ("other_metal_bal", "其他金属余额"),
155
- ("curr_dpsit_bal", "活期存款余额"),
156
- ("time_dpsit_bal", "定期存款余额"),
157
- ("oil_bal", "石油余额"),
158
- ("fx_bal", "外汇余额"),
159
- ]
160
-
161
- # 严格按照原始循环逻辑注册字段
162
- for field_name, desc in asset_fields:
163
- schema.add_field(field_name, "decimal", comment=desc, aggregatable=True)
122
+ # 资产配置比例字段
123
+ schema.add_field("cash_ratio", "float", comment="现金类资产占比", aggregatable=True)
124
+ schema.add_field("fixed_income_ratio", "float", comment="固收类资产占比", aggregatable=True)
125
+ schema.add_field("equity_ratio", "float", comment="权益类资产占比", aggregatable=True)
126
+ schema.add_field("alternative_ratio", "float", comment="另类资产占比", aggregatable=True)
164
127
 
165
- schema.set_monthly_unique(True) # C表每人每月唯一
128
+ # 风险偏好相关
129
+ schema.add_field("risk_level", "int", comment="风险偏好等级(1-5)", aggregatable=True)
130
+ schema.add_field("investment_experience", "int", comment="投资经验年限", aggregatable=True)
131
+
132
+ # 配置变化指标
133
+ schema.add_field("config_change_freq", "int", comment="配置调整频率", aggregatable=True)
134
+ schema.add_field("rebalance_count", "int", comment="再平衡次数", aggregatable=True)
135
+
136
+ schema.set_monthly_unique(True)
166
137
  return schema
167
138
 
168
139
 
169
- class AUMMonthlyStatSchema:
170
- """AUM月度统计表 - D表结构定义(严格按照已提供给行方的字段)"""
140
+ class XinjiangICBCDaifaLongtailMonthlyStatSchema:
141
+ """新疆工行代发长尾客户月度统计表"""
171
142
 
172
143
  @staticmethod
173
144
  def create() -> TableSchema:
174
- """创建月度统计表结构 - 严格按照原始定义"""
175
- schema = TableSchema('bi_hlwj_realy_month_stat_wy')
176
- schema.add_primary_key('party_id', 'string') # 修正主键名称
145
+ """创建新疆工行代发长尾客户月度统计表结构"""
146
+ schema = TableSchema('xinjiang_icbc_daifa_hlwj_monthly_stat_wy')
147
+ schema.add_primary_key('party_id', 'string')
177
148
  schema.add_date_field('data_dt', 'string')
178
149
 
179
- # channels字典 - 严格按照原始定义
180
- channels = {
181
- "CASH_DEPIST": "现金",
182
- "REMIT": "汇款",
183
- "YY": "邮政储蓄",
184
- "UNIONPAY": "银联",
185
- "FIN_ASSET": "理财产品",
186
- "CORP_ACCT": "对公账户",
187
- }
188
-
189
- # 注册存入和取出字段 - 严格按照原始逻辑
190
- for prefix, desc in channels.items():
191
- schema.add_field(f"{prefix}_IN", "decimal", comment=f"{desc}存入金额", aggregatable=True)
192
- schema.add_field(f"{prefix}_OUT", "decimal", comment=f"{desc}取出金额", aggregatable=True)
193
-
194
- # 其他特定字段 - 严格按照原始定义
195
- schema.add_field("AGENT_SALARY_IN", "decimal", comment="代发工资存入金额", aggregatable=True)
196
- schema.add_field("CREDIT_CARD_OUT", "decimal", comment="信用卡取出金额", aggregatable=True)
197
- schema.add_field("DEBIT_CARD_OUT", "decimal", comment="借记卡取出金额", aggregatable=True)
198
- schema.add_field("BATCH_DEDUCT_OUT", "decimal", comment="批量扣款金额", aggregatable=True)
199
-
200
- # 定义字段结构:交易渠道、指标、时间范围、描述前缀 - 严格按照原始定义
201
- fields = [
202
- ("DEBIT_CARD", "借记卡", "MON3"),
203
- ("CREDIT_CARD", "信用卡", "MON3"),
204
- ("THIRD_PAYMENT", "第三方支付", "MON3"),
205
- ("MOBBANK", "手机银行", "MON12"),
206
- ("TELBANK", "电话银行", "MON12"),
207
- ]
208
-
209
- # 定义交易指标 - 严格按照原始定义
210
- metrics = [
211
- ("TX_CNT", "交易次数"),
212
- ("TX_AMT", "交易金额"),
213
- ]
214
-
215
- # 自动注册 - 严格按照原始逻辑
216
- for channel, desc, period in fields:
217
- for metric_code, metric_desc in metrics:
218
- field_name = f"{channel}_{metric_code}_{period}"
219
- description = f"{desc}{metric_desc}(近{period[-2:]}个月)"
220
- schema.add_field(field_name, "decimal", comment=description, aggregatable=True)
221
-
222
- # 其他固定字段 - 严格按照原始定义
223
- schema.add_field(
224
- "COUNTER_TX_CNT_MON12", "int", comment="柜台交易次数(近12个月)", aggregatable=True
225
- )
226
- schema.add_field(
227
- "WEBBANK_TX_CNT_MON12", "int", comment="网银交易次数(近12个月)", aggregatable=True
228
- )
229
-
230
- # 编号国家(1~5) - 严格按照原始循环逻辑
231
- for i in range(1, 6):
232
- schema.add_field(
233
- f"Y1_OVERS_CTY{i}_CNT", "int", comment=f"近一年境外国家{i}的交易次数", aggregatable=True
234
- )
235
- schema.add_field(
236
- f"Y1_OVERS_CNT_CTY{i}_CD",
237
- "string",
238
- comment=f"近一年境外国家{i}的交易次数(编码)",
239
- )
240
- schema.add_field(
241
- f"Y1_OVERS_CTY{i}_AMT", "decimal", comment=f"近一年境外国家{i}的交易金额", aggregatable=True
242
- )
243
- schema.add_field(
244
- f"Y1_OVERS_AMT_CTY{i}_CD",
245
- "string",
246
- comment=f"近一年境外国家{i}的交易金额(编码)",
247
- )
248
-
249
- # 其他国家 - 严格按照原始定义
250
- schema.add_field(
251
- "Y1_OVERS_OTHER_CTY_CNT", "int", comment="近一年其他境外国家的交易次数", aggregatable=True
252
- )
253
- schema.add_field(
254
- "Y1_OVERS_OTHER_CTY_AMT", "decimal", comment="近一年其他境外国家的交易金额", aggregatable=True
255
- )
150
+ # 月度交易统计
151
+ schema.add_field("monthly_txn_count", "int", comment="月度交易笔数", aggregatable=True)
152
+ schema.add_field("monthly_txn_amount", "decimal", comment="月度交易金额", aggregatable=True)
153
+ schema.add_field("monthly_deposit_amount", "decimal", comment="月度存入金额", aggregatable=True)
154
+ schema.add_field("monthly_withdraw_amount", "decimal", comment="月度取出金额", aggregatable=True)
155
+
156
+ # 代发工资相关统计
157
+ schema.add_field("salary_amount", "decimal", comment="月度代发工资金额", aggregatable=True)
158
+ schema.add_field("salary_date", "string", comment="代发工资日期")
159
+ schema.add_field("salary_stability", "float", comment="工资稳定性指数", aggregatable=True)
160
+
161
+ # 长尾客户特征
162
+ schema.add_field("longtail_score", "float", comment="长尾客户评分", aggregatable=True)
163
+ schema.add_field("upgrade_potential", "float", comment="提升潜力评分", aggregatable=True)
164
+ schema.add_field("churn_risk", "float", comment="流失风险评分", aggregatable=True)
256
165
 
257
- schema.set_monthly_unique(True) # D表每人每月唯一
166
+ # 活跃度指标
167
+ schema.add_field("login_days", "int", comment="月度登录天数", aggregatable=True)
168
+ schema.add_field("channel_usage", "string", comment="渠道使用情况")
169
+
170
+ schema.set_monthly_unique(True)
258
171
  return schema
259
172
 
260
173
 
261
- def get_aum_schemas() -> Dict[str, TableSchema]:
262
- """获取所有AUM业务表结构"""
174
+ def get_xinjiang_icbc_daifa_longtail_schemas() -> Dict[str, TableSchema]:
175
+ """获取新疆工行代发长尾客户所有表结构"""
263
176
  return {
264
- 'behavior': AUMBehaviorSchema.create(),
265
- 'asset_avg': AUMAssetAvgSchema.create(),
266
- 'asset_config': AUMAssetConfigSchema.create(),
267
- 'monthly_stat': AUMMonthlyStatSchema.create()
177
+ 'daifa_longtail_behavior': XinjiangICBCDaifaLongtailBehaviorSchema.create(),
178
+ 'daifa_longtail_asset_avg': XinjiangICBCDaifaLongtailAssetAvgSchema.create(),
179
+ 'daifa_longtail_asset_config': XinjiangICBCDaifaLongtailAssetConfigSchema.create(),
180
+ 'daifa_longtail_monthly_stat': XinjiangICBCDaifaLongtailMonthlyStatSchema.create(),
268
181
  }
269
182
 
270
183
 
271
- def export_aum_docs(output_dir: str = "./docs", format_type: str = "markdown") -> Dict[str, str]:
272
- """
273
- 导出AUM表结构文档
274
-
275
- Args:
276
- output_dir: 输出目录
277
- format_type: 文档格式 ('markdown' 或 'pdf')
278
-
279
- Returns:
280
- 生成的文档文件路径字典
281
- """
282
- schemas = get_aum_schemas()
184
+ def export_xinjiang_icbc_daifa_longtail_docs(output_dir: str = "./docs") -> Dict[str, str]:
185
+ """导出新疆工行代发长尾客户表结构文档"""
283
186
  generator = SchemaDocumentGenerator()
187
+ schemas = get_xinjiang_icbc_daifa_longtail_schemas()
188
+ exported_files = {}
284
189
 
285
- results = {}
286
190
  for table_type, schema in schemas.items():
287
191
  file_path = generator.export_schema_doc(
288
- schema=schema,
289
- business_domain="AUM",
192
+ schema,
193
+ business_domain="新疆工行代发长尾客户",
290
194
  table_type=table_type,
291
- output_dir=output_dir,
292
- format_type=format_type
195
+ output_dir=output_dir
293
196
  )
294
- results[table_type] = file_path
197
+ exported_files[table_type] = file_path
295
198
 
296
- return results
199
+ return exported_files
297
200
 
298
201
 
202
+ # 导出主要组件
299
203
  __all__ = [
300
- 'AUMBehaviorSchema',
301
- 'AUMAssetAvgSchema',
302
- 'AUMAssetConfigSchema',
303
- 'AUMMonthlyStatSchema',
304
- 'get_aum_schemas',
305
- 'export_aum_docs'
204
+ 'XinjiangICBCDaifaLongtailBehaviorSchema',
205
+ 'XinjiangICBCDaifaLongtailAssetAvgSchema',
206
+ 'XinjiangICBCDaifaLongtailAssetConfigSchema',
207
+ 'XinjiangICBCDaifaLongtailMonthlyStatSchema',
208
+ 'get_xinjiang_icbc_daifa_longtail_schemas',
209
+ 'export_xinjiang_icbc_daifa_longtail_docs'
306
210
  ]
@@ -1,48 +1,70 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: staran
3
- Version: 0.5.0
3
+ Version: 0.6.1
4
4
  Summary: staran - 高性能Python工具库
5
5
  Home-page: https://github.com/starlxa/staran
6
6
  Author: StarAn
7
- Author-email: starlxa@icloud.com
7
+ Author-email: StarAn <starlxa@icloud.com>
8
+ License: MIT
9
+ Project-URL: Homepage, https://github.com/starlxa/staran
10
+ Project-URL: Bug Reports, https://github.com/starlxa/staran/issues
11
+ Project-URL: Source, https://github.com/starlxa/staran
12
+ Keywords: machine-learning,feature-engineering,data-processing,sql-generation
13
+ Classifier: Development Status :: 4 - Beta
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
16
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
8
17
  Classifier: Programming Language :: Python :: 3
9
- Classifier: License :: OSI Approved :: MIT License
18
+ Classifier: Programming Language :: Python :: 3.7
19
+ Classifier: Programming Language :: Python :: 3.8
20
+ Classifier: Programming Language :: Python :: 3.9
21
+ Classifier: Programming Language :: Python :: 3.10
22
+ Classifier: Programming Language :: Python :: 3.11
23
+ Classifier: Programming Language :: Python :: 3.12
24
+ Classifier: Operating System :: OS Independent
10
25
  Requires-Python: >=3.7
11
26
  Description-Content-Type: text/markdown
12
27
  License-File: LICENSE
13
- Requires-Dist: datetime
14
- Requires-Dist: calendar
15
- Requires-Dist: re
16
28
  Dynamic: author
17
- Dynamic: author-email
18
- Dynamic: classifier
19
- Dynamic: description
20
- Dynamic: description-content-type
21
29
  Dynamic: home-page
22
30
  Dynamic: license-file
23
- Dynamic: requires-dist
24
31
  Dynamic: requires-python
25
- Dynamic: summary
26
32
 
27
- # Star## ✨ v0.4.2 新特性
33
+ # Star## Staran ✨ v0.6.1 新特性
28
34
 
29
- - 📋 **独立Schema模块** - 专门的表结构定义和管理模块
35
+ - **完善的包管理** - 优化setup.py配置,移除不必要的标准库依赖
36
+ - �📋 **独立Schema模块** - 专门的表结构定义和管理模块
30
37
  - 📄 **文档自动生成** - 支持Markdown/PDF/HTML格式的技术文档生成
31
38
  - 🏢 **业务域支持** - AUM等业务领域的标准表结构定义
32
- - 🔗 **无缝集成** - Schema与特征工程examples模块完美集成
39
+ - 🔗 **无缝集成** - Schema与特征工程模块完美集成
33
40
  - 🛠️ **模块化引擎架构** - 独立的引擎模块,支持Spark、Hive、图灵平台
34
41
  - 🔧 **统一接口设计** - 所有引擎提供一致的SQL生成、执行和下载接口
35
42
  - 🎯 **继承复用架构** - TuringEngine继承SparkEngine,复用SQL生成逻辑
36
43
  - 📦 **清晰代码分离** - SQL生成与平台特定执行逻辑完全分离
37
44
  - 🚀 **易于扩展** - 新增数据库支持只需实现BaseEngine接口
38
45
  - 📁 **独立引擎存储** - engines/文件夹专门存放所有数据库引擎
39
- - 🔄 **向后兼容** - 保持对原有API的完全兼容工程工具包
46
+ - 🔄 **向后兼容** - 保持对原有API的完全兼容
47
+
48
+ ## 🎯 专为机器学习设计的Python工具包
49
+
50
+ Staran是一个强大的特征工程和数据处理工具包,提供从数据到模型的完整解决方案。特别针对工银图灵平台优化,让特征工程和模型训练变得前所未有的简单。
51
+
52
+ ## ✨ v0.6.1 新特性
53
+
54
+ - 🔧 **完善的包管理** - 优化setup.py配置,移除不必要的标准库依赖
55
+ - 🛠️ **模块化引擎架构** - 独立的引擎模块,支持Spark、Hive、图灵平台
56
+ - 🔧 **统一接口设计** - 所有引擎提供一致的SQL生成、执行和下载接口
57
+ - 🎯 **继承复用架构** - TuringEngine继承SparkEngine,复用SQL生成逻辑
58
+ - 📦 **清晰代码分离** - SQL生成与平台特定执行逻辑完全分离
59
+ - 🚀 **易于扩展** - 新增数据库支持只需实现BaseEngine接口
60
+ - 📁 **独立引擎存储** - engines/文件夹专门存放所有数据库引擎
61
+ - 🔄 **向后兼容** - 保持对原有API的完全兼容
40
62
 
41
63
  ## � 专为机器学习设计的Python工具包
42
64
 
43
65
  Staran是一个强大的特征工程和数据处理工具包,提供从数据到模型的完整解决方案。特别针对工银图灵平台优化,让特征工程和模型训练变得前所未有的简单。
44
66
 
45
- ## ✨ v0.4.2 新特性
67
+ ## ✨ v0.6.0 新特性
46
68
 
47
69
  - �️ **模块化引擎架构** - 独立的引擎模块,支持Spark、Hive、图灵平台
48
70
  - 🔧 **统一接口设计** - 所有引擎提供一致的SQL生成、执行和下载接口
@@ -361,7 +383,7 @@ tomorrow = date.add_days(1) # 202504 (智能处理)
361
383
 
362
384
  ```
363
385
  staran/
364
- ├── __init__.py # 主包入口,v0.4.2功能导出
386
+ ├── __init__.py # 主包入口,v0.6.0功能导出
365
387
  ├── schemas/ # 🆕 表结构定义与文档生成模块
366
388
  │ ├── __init__.py # Schema模块入口
367
389
  │ ├── document_generator.py # 文档生成器 (MD/PDF/HTML)
@@ -385,7 +407,7 @@ staran/
385
407
  │ ├── __init__.py # 工具模块
386
408
  │ └── date.py # Date类实现
387
409
  ├── setup.py # 安装配置
388
- ├── README.md # 本文档 v0.4.2
410
+ ├── README.md # 本文档 v0.6.0
389
411
  └── quick-upload.sh # 快速部署脚本
390
412
  ```
391
413
 
@@ -561,4 +583,4 @@ MIT License
561
583
 
562
584
  ---
563
585
 
564
- **Staran v0.4.2** - 模块化引擎架构,让机器学习特征工程变得前所未有的简单 🌟
586
+ **Staran v0.6.0** - 模块化引擎架构,让机器学习特征工程变得前所未有的简单 🌟
@@ -1,29 +1,28 @@
1
- staran/__init__.py,sha256=cBUxN-qmS_Bf8r-8k94VdA-OLvjdRZyxTNfgd-X_AE8,7234
1
+ staran/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ staran/banks/__init__.py,sha256=m4IUdFLXvNtgpmxDhKptCtALyaGkF1-T9hbNqdXczLI,544
3
+ staran/banks/xinjiang_icbc/__init__.py,sha256=Ql3gQFh1h4EwUdU3fePW1chV5iAegxjLWiPxbYa_J80,3131
2
4
  staran/engines/__init__.py,sha256=aQCpDxY_JcKlApEsEp2wg_P3IwNDTCFb7OYcLHiPPmk,1627
3
5
  staran/engines/base.py,sha256=IIN-QxPsO-q3KmQ3Lz0cB9bs6Oac0Wy5MIF605HrHVw,7969
4
6
  staran/engines/hive.py,sha256=-KwZiAvK5cxwnoyYQlqGWrcZkeKhbd8QCX3chpbezd0,5894
5
7
  staran/engines/spark.py,sha256=XPxzefD9UF8oigeQISBW892RINJ9dGLbl994FWpIKBc,9361
6
8
  staran/engines/turing.py,sha256=XEKkEMMWedvaGxKQ2vEHmB3TWLNLxOu1upgiBylwqjA,15516
7
- staran/examples/__init__.py,sha256=rXjHvD_EA1sl04WAcOMGnktOwZstjUxaei6bo7pPMII,229
8
- staran/examples/aum_longtail.py,sha256=UFeLzhslS0Qw1defD9M8mI6Jq4G2BHoyqdjNfX0cgH0,9915
9
- staran/examples/aum_longtail_old.py,sha256=wZW_3NsU8lOjohtzI1ewzFIqTDAt8lnUberQJVYePfs,21723
10
9
  staran/features/__init__.py,sha256=uMloEuevUjUPfro8Yv4STwvxpSVL0J1xsQTzN_EkLpo,1828
11
10
  staran/features/engines.py,sha256=kqdS2xjmCVi0Xz1Oc3WaTMIavgAriX8F7VvUgVcpfqo,10039
12
11
  staran/features/generator.py,sha256=CI1F_PshOvokQJelsqSaVp-SNQpMc-WVmjMQKzgdeLw,23114
13
12
  staran/features/manager.py,sha256=2-3Hc3qthtyzwiuQy5QTz6RfhKK3szoylconzI3moc4,5201
14
13
  staran/features/schema.py,sha256=FwOfpTcxq4K8zkO3MFNqKPQBp_e8qY-N6gazqm9_lAQ,6067
15
- staran/models/__init__.py,sha256=NH4r6GTAz9MeUfq1jAyVkx-nC4bM78XvbWA9TuwMLik,2141
16
- staran/models/bank_configs.py,sha256=wN3GA_8cb5wevDC-sWRcJ3lMuaHahZVjC85K_t2aQt0,8177
14
+ staran/models/__init__.py,sha256=VbfrRjmnp8KlFSEZOa-buECAaERptzAnvUUZK9dpgtY,2390
17
15
  staran/models/config.py,sha256=fTbZtJq4-ZuCSSd1eW7TkIbEdDyZv2agHJCYnwOCJ_s,8886
16
+ staran/models/daifa_models.py,sha256=J7aqK41NDMDjacsjmxqwyuJfgf1kJx-Kaxj5CGQLISE,13166
18
17
  staran/models/registry.py,sha256=Zeey4TtbHtJ40odyZQzOLijyZCmlMBRuniPk_znS2Q8,10223
19
18
  staran/models/target.py,sha256=gKTTatxvOJjmE50qD6G6mhlYLuZL3Cvn3FLNbXl1eeU,10531
20
- staran/schemas/__init__.py,sha256=2RkcWCaIkrOHd37zzRCla0-jNg4cPnc6BGmmW5Vha0Y,652
21
- staran/schemas/document_generator.py,sha256=Mr7TjmKwspqxXnp9DhzZxsRx0l2Bo7MOI8mOxRtgwxU,13600
22
- staran/schemas/aum/__init__.py,sha256=jVkmJdhHGHdGE4rJ605zsRU2zIQMEHWnlgW2ZQk8AdU,13082
19
+ staran/schemas/__init__.py,sha256=ztrBlQ3irbgM7gHB_dhiLEX1ZpDX2AAWOeiPnZTe-sk,779
20
+ staran/schemas/aum/__init__.py,sha256=z0cuC6A3z-1cPKMDYrn0wCumjKkpk_0kfqGfW1JNEbc,9815
23
21
  staran/tools/__init__.py,sha256=KtudrYnxKD9HZEL4H-mrWlKrmsI3rYjJrLeC9YDTpG4,1054
24
22
  staran/tools/date.py,sha256=-QyEMWVx6czMuOIwcV7kR3gBMRVOwb5qevo7GEFSJKE,10488
25
- staran-0.5.0.dist-info/licenses/LICENSE,sha256=2EmsBIyDCono4iVXNpv5_px9qt2b7hfPq1WuyGVMNP4,1361
26
- staran-0.5.0.dist-info/METADATA,sha256=1c6403YfhOFEsZV7Ng1pe4B_wlRdp8SZypmhIH_AaVo,18809
27
- staran-0.5.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
28
- staran-0.5.0.dist-info/top_level.txt,sha256=NOUZtXSh5oSIEjHrC0lQ9WmoKtD010Q00dghWyag-Zs,7
29
- staran-0.5.0.dist-info/RECORD,,
23
+ staran/tools/document_generator.py,sha256=Mr7TjmKwspqxXnp9DhzZxsRx0l2Bo7MOI8mOxRtgwxU,13600
24
+ staran-0.6.1.dist-info/licenses/LICENSE,sha256=2EmsBIyDCono4iVXNpv5_px9qt2b7hfPq1WuyGVMNP4,1361
25
+ staran-0.6.1.dist-info/METADATA,sha256=_lLAAbTSVcGJ4hSj8DVXig4fGhgGVoYlu_YjR14IJ0M,20444
26
+ staran-0.6.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
27
+ staran-0.6.1.dist-info/top_level.txt,sha256=NOUZtXSh5oSIEjHrC0lQ9WmoKtD010Q00dghWyag-Zs,7
28
+ staran-0.6.1.dist-info/RECORD,,
@@ -1,8 +0,0 @@
1
- """
2
- Staran Examples Module
3
- 包含各种完整的特征工程示例
4
- """
5
-
6
- from .aum_longtail import AUMLongtailExample, create_aum_example, run_aum_example
7
-
8
- __all__ = ['AUMLongtailExample', 'create_aum_example', 'run_aum_example']