staran 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- staran/__init__.py +11 -3
- staran/engines/__init__.py +65 -0
- staran/engines/base.py +255 -0
- staran/engines/hive.py +163 -0
- staran/engines/spark.py +252 -0
- staran/engines/turing.py +439 -0
- staran/examples/__init__.py +8 -0
- staran/examples/aum_longtail.py +250 -0
- staran/examples/aum_longtail_old.py +487 -0
- staran/features/__init__.py +59 -0
- staran/features/engines.py +284 -0
- staran/features/generator.py +603 -0
- staran/features/manager.py +155 -0
- staran/features/schema.py +193 -0
- staran/schemas/__init__.py +28 -0
- staran/schemas/aum/__init__.py +314 -0
- staran/schemas/document_generator.py +337 -0
- {staran-0.3.0.dist-info → staran-0.4.0.dist-info}/METADATA +57 -6
- staran-0.4.0.dist-info/RECORD +24 -0
- staran-0.3.0.dist-info/RECORD +0 -8
- {staran-0.3.0.dist-info → staran-0.4.0.dist-info}/WHEEL +0 -0
- {staran-0.3.0.dist-info → staran-0.4.0.dist-info}/licenses/LICENSE +0 -0
- {staran-0.3.0.dist-info → staran-0.4.0.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: staran
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.4.0
|
4
4
|
Summary: staran - 高性能Python工具库
|
5
5
|
Home-page: https://github.com/starlxa/staran
|
6
6
|
Author: StarAn
|
@@ -24,13 +24,25 @@ Dynamic: requires-dist
|
|
24
24
|
Dynamic: requires-python
|
25
25
|
Dynamic: summary
|
26
26
|
|
27
|
-
#
|
27
|
+
# Star## ✨ v0.4.0 新特性
|
28
|
+
|
29
|
+
- 📋 **独立Schema模块** - 专门的表结构定义和管理模块
|
30
|
+
- 📄 **文档自动生成** - 支持Markdown/PDF/HTML格式的技术文档生成
|
31
|
+
- 🏢 **业务域支持** - AUM等业务领域的标准表结构定义
|
32
|
+
- 🔗 **无缝集成** - Schema与特征工程examples模块完美集成
|
33
|
+
- 🛠️ **模块化引擎架构** - 独立的引擎模块,支持Spark、Hive、图灵平台
|
34
|
+
- 🔧 **统一接口设计** - 所有引擎提供一致的SQL生成、执行和下载接口
|
35
|
+
- 🎯 **继承复用架构** - TuringEngine继承SparkEngine,复用SQL生成逻辑
|
36
|
+
- 📦 **清晰代码分离** - SQL生成与平台特定执行逻辑完全分离
|
37
|
+
- 🚀 **易于扩展** - 新增数据库支持只需实现BaseEngine接口
|
38
|
+
- 📁 **独立引擎存储** - engines/文件夹专门存放所有数据库引擎
|
39
|
+
- 🔄 **向后兼容** - 保持对原有API的完全兼容工程工具包
|
28
40
|
|
29
41
|
## � 专为机器学习设计的Python工具包
|
30
42
|
|
31
43
|
Staran是一个强大的特征工程和数据处理工具包,提供从数据到模型的完整解决方案。特别针对工银图灵平台优化,让特征工程和模型训练变得前所未有的简单。
|
32
44
|
|
33
|
-
## ✨ v0.
|
45
|
+
## ✨ v0.4.0 新特性
|
34
46
|
|
35
47
|
- �️ **模块化引擎架构** - 独立的引擎模块,支持Spark、Hive、图灵平台
|
36
48
|
- 🔧 **统一接口设计** - 所有引擎提供一致的SQL生成、执行和下载接口
|
@@ -82,6 +94,40 @@ result = turing_engine.create_table('my_table', sql, execute=True)
|
|
82
94
|
download = turing_engine.download_table_data('my_table', 'file:///nfsHome/data.parquet')
|
83
95
|
```
|
84
96
|
|
97
|
+
### Schema模块 - 表结构管理与文档生成
|
98
|
+
|
99
|
+
```python
|
100
|
+
from staran import get_aum_schemas, export_aum_docs, SchemaDocumentGenerator
|
101
|
+
|
102
|
+
# 1. 获取预定义业务表结构
|
103
|
+
schemas = get_aum_schemas() # 获取AUM业务域的所有表结构
|
104
|
+
|
105
|
+
for table_type, schema in schemas.items():
|
106
|
+
print(f"{table_type}: {schema.table_name} ({len(schema.fields)}个字段)")
|
107
|
+
|
108
|
+
# 2. 生成业务文档
|
109
|
+
docs = export_aum_docs('./docs', 'markdown') # 生成Markdown格式文档
|
110
|
+
|
111
|
+
# 3. 自定义文档生成
|
112
|
+
generator = SchemaDocumentGenerator()
|
113
|
+
doc_path = generator.export_schema_doc(
|
114
|
+
schema=schemas['behavior'],
|
115
|
+
business_domain="AUM",
|
116
|
+
table_type="behavior",
|
117
|
+
format_type="markdown"
|
118
|
+
)
|
119
|
+
|
120
|
+
# 4. 与特征工程集成
|
121
|
+
from staran import create_aum_example, run_aum_example
|
122
|
+
|
123
|
+
# 基于预定义schema创建特征工程示例
|
124
|
+
example = create_aum_example()
|
125
|
+
summary = example.get_summary() # 获取特征统计信息
|
126
|
+
|
127
|
+
# 一键运行完整流程
|
128
|
+
results = run_aum_example('202507') # 生成916个特征
|
129
|
+
```
|
130
|
+
|
85
131
|
### 特征工程 - SQL自动生成
|
86
132
|
|
87
133
|
```python
|
@@ -315,7 +361,12 @@ tomorrow = date.add_days(1) # 202504 (智能处理)
|
|
315
361
|
|
316
362
|
```
|
317
363
|
staran/
|
318
|
-
├── __init__.py # 主包入口,v0.
|
364
|
+
├── __init__.py # 主包入口,v0.4.0功能导出
|
365
|
+
├── schemas/ # 🆕 表结构定义与文档生成模块
|
366
|
+
│ ├── __init__.py # Schema模块入口
|
367
|
+
│ ├── document_generator.py # 文档生成器 (MD/PDF/HTML)
|
368
|
+
│ └── aum/ # AUM业务域表结构
|
369
|
+
│ └── __init__.py # AUM表结构定义
|
319
370
|
├── engines/ # 🆕 模块化引擎架构
|
320
371
|
│ ├── __init__.py # 引擎模块入口
|
321
372
|
│ ├── base.py # BaseEngine抽象基类
|
@@ -334,7 +385,7 @@ staran/
|
|
334
385
|
│ ├── __init__.py # 工具模块
|
335
386
|
│ └── date.py # Date类实现
|
336
387
|
├── setup.py # 安装配置
|
337
|
-
├── README.md # 本文档 v0.
|
388
|
+
├── README.md # 本文档 v0.4.0
|
338
389
|
└── quick-upload.sh # 快速部署脚本
|
339
390
|
```
|
340
391
|
|
@@ -510,4 +561,4 @@ MIT License
|
|
510
561
|
|
511
562
|
---
|
512
563
|
|
513
|
-
**Staran v0.
|
564
|
+
**Staran v0.4.0** - 模块化引擎架构,让机器学习特征工程变得前所未有的简单 🌟
|
@@ -0,0 +1,24 @@
|
|
1
|
+
staran/__init__.py,sha256=cBUxN-qmS_Bf8r-8k94VdA-OLvjdRZyxTNfgd-X_AE8,7234
|
2
|
+
staran/engines/__init__.py,sha256=aQCpDxY_JcKlApEsEp2wg_P3IwNDTCFb7OYcLHiPPmk,1627
|
3
|
+
staran/engines/base.py,sha256=IIN-QxPsO-q3KmQ3Lz0cB9bs6Oac0Wy5MIF605HrHVw,7969
|
4
|
+
staran/engines/hive.py,sha256=-KwZiAvK5cxwnoyYQlqGWrcZkeKhbd8QCX3chpbezd0,5894
|
5
|
+
staran/engines/spark.py,sha256=XPxzefD9UF8oigeQISBW892RINJ9dGLbl994FWpIKBc,9361
|
6
|
+
staran/engines/turing.py,sha256=XEKkEMMWedvaGxKQ2vEHmB3TWLNLxOu1upgiBylwqjA,15516
|
7
|
+
staran/examples/__init__.py,sha256=rXjHvD_EA1sl04WAcOMGnktOwZstjUxaei6bo7pPMII,229
|
8
|
+
staran/examples/aum_longtail.py,sha256=UFeLzhslS0Qw1defD9M8mI6Jq4G2BHoyqdjNfX0cgH0,9915
|
9
|
+
staran/examples/aum_longtail_old.py,sha256=wZW_3NsU8lOjohtzI1ewzFIqTDAt8lnUberQJVYePfs,21723
|
10
|
+
staran/features/__init__.py,sha256=uMloEuevUjUPfro8Yv4STwvxpSVL0J1xsQTzN_EkLpo,1828
|
11
|
+
staran/features/engines.py,sha256=kqdS2xjmCVi0Xz1Oc3WaTMIavgAriX8F7VvUgVcpfqo,10039
|
12
|
+
staran/features/generator.py,sha256=CI1F_PshOvokQJelsqSaVp-SNQpMc-WVmjMQKzgdeLw,23114
|
13
|
+
staran/features/manager.py,sha256=2-3Hc3qthtyzwiuQy5QTz6RfhKK3szoylconzI3moc4,5201
|
14
|
+
staran/features/schema.py,sha256=FwOfpTcxq4K8zkO3MFNqKPQBp_e8qY-N6gazqm9_lAQ,6067
|
15
|
+
staran/schemas/__init__.py,sha256=2RkcWCaIkrOHd37zzRCla0-jNg4cPnc6BGmmW5Vha0Y,652
|
16
|
+
staran/schemas/document_generator.py,sha256=Lm9rim6yPnT1U_aStaM4KtU9eKxHnDNVfJIusQf5zQY,13120
|
17
|
+
staran/schemas/aum/__init__.py,sha256=n_DuAH3ncaScS3hPR72Eq6RtcFj0WTp3fbXHVes3WsE,16743
|
18
|
+
staran/tools/__init__.py,sha256=KtudrYnxKD9HZEL4H-mrWlKrmsI3rYjJrLeC9YDTpG4,1054
|
19
|
+
staran/tools/date.py,sha256=-QyEMWVx6czMuOIwcV7kR3gBMRVOwb5qevo7GEFSJKE,10488
|
20
|
+
staran-0.4.0.dist-info/licenses/LICENSE,sha256=2EmsBIyDCono4iVXNpv5_px9qt2b7hfPq1WuyGVMNP4,1361
|
21
|
+
staran-0.4.0.dist-info/METADATA,sha256=doZ3FJjMqxkOgqYKz74dwlJk4ICd4I0P38Qv48YSJG4,18809
|
22
|
+
staran-0.4.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
23
|
+
staran-0.4.0.dist-info/top_level.txt,sha256=NOUZtXSh5oSIEjHrC0lQ9WmoKtD010Q00dghWyag-Zs,7
|
24
|
+
staran-0.4.0.dist-info/RECORD,,
|
staran-0.3.0.dist-info/RECORD
DELETED
@@ -1,8 +0,0 @@
|
|
1
|
-
staran/__init__.py,sha256=ia3Hu8bJDy9_MMv1otR659qxvtxsEWQ-JceSLxdiY3g,6945
|
2
|
-
staran/tools/__init__.py,sha256=KtudrYnxKD9HZEL4H-mrWlKrmsI3rYjJrLeC9YDTpG4,1054
|
3
|
-
staran/tools/date.py,sha256=-QyEMWVx6czMuOIwcV7kR3gBMRVOwb5qevo7GEFSJKE,10488
|
4
|
-
staran-0.3.0.dist-info/licenses/LICENSE,sha256=2EmsBIyDCono4iVXNpv5_px9qt2b7hfPq1WuyGVMNP4,1361
|
5
|
-
staran-0.3.0.dist-info/METADATA,sha256=jaYgW3F7ZpDcoJSDG0VA9jhEEqAJ3wuSfhqRv9UFvKs,16573
|
6
|
-
staran-0.3.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
7
|
-
staran-0.3.0.dist-info/top_level.txt,sha256=NOUZtXSh5oSIEjHrC0lQ9WmoKtD010Q00dghWyag-Zs,7
|
8
|
-
staran-0.3.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|