yihuier 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. yihuier-0.2.0/.claude/skills/risk-modeling.skill +0 -0
  2. yihuier-0.2.0/.github/workflows/deploy-docs.yml +58 -0
  3. yihuier-0.2.0/.gitignore +43 -0
  4. yihuier-0.2.0/.python-version +1 -0
  5. yihuier-0.2.0/CHANGELOG.md +104 -0
  6. yihuier-0.2.0/LICENSE +21 -0
  7. yihuier-0.2.0/PKG-INFO +156 -0
  8. yihuier-0.2.0/PYPI_PUBLISHING.md +138 -0
  9. yihuier-0.2.0/README.md +114 -0
  10. yihuier-0.2.0/Scorecard--Function/EDA/345/210/206/346/236/220.py +142 -0
  11. yihuier-0.2.0/Scorecard--Function/README.md +78 -0
  12. yihuier-0.2.0/Scorecard--Function//345/217/230/351/207/217woe/347/246/273/346/225/243/345/214/226.py +55 -0
  13. yihuier-0.2.0/Scorecard--Function//345/217/230/351/207/217/345/210/206/347/256/261.py +467 -0
  14. yihuier-0.2.0/Scorecard--Function//345/217/230/351/207/217/347/255/233/351/200/211.py +173 -0
  15. yihuier-0.2.0/Scorecard--Function//346/225/260/346/215/256/351/242/204/345/244/204/347/220/206.py +187 -0
  16. yihuier-0.2.0/Scorecard--Function//346/250/241/345/236/213/350/257/204/344/274/260.py +153 -0
  17. yihuier-0.2.0/Scorecard--Function//350/257/204/345/210/206/345/215/241/345/256/236/347/216/260/345/222/214/350/257/204/344/274/260.py +302 -0
  18. yihuier-0.2.0/Scorecard--Function//350/257/204/345/210/206/345/215/241/347/233/221/346/216/247.py +153 -0
  19. yihuier-0.2.0/docs/.npmrc +2 -0
  20. yihuier-0.2.0/docs/.vitepress/cache/deps/@braintree_sanitize-url.js +92 -0
  21. yihuier-0.2.0/docs/.vitepress/cache/deps/@braintree_sanitize-url.js.map +7 -0
  22. yihuier-0.2.0/docs/.vitepress/cache/deps/@theme_index.js +276 -0
  23. yihuier-0.2.0/docs/.vitepress/cache/deps/@theme_index.js.map +7 -0
  24. yihuier-0.2.0/docs/.vitepress/cache/deps/_metadata.json +91 -0
  25. yihuier-0.2.0/docs/.vitepress/cache/deps/chunk-BUSYA2B4.js +9 -0
  26. yihuier-0.2.0/docs/.vitepress/cache/deps/chunk-BUSYA2B4.js.map +7 -0
  27. yihuier-0.2.0/docs/.vitepress/cache/deps/chunk-OL6HMLMB.js +13018 -0
  28. yihuier-0.2.0/docs/.vitepress/cache/deps/chunk-OL6HMLMB.js.map +7 -0
  29. yihuier-0.2.0/docs/.vitepress/cache/deps/chunk-T3Q5HG2B.js +9719 -0
  30. yihuier-0.2.0/docs/.vitepress/cache/deps/chunk-T3Q5HG2B.js.map +7 -0
  31. yihuier-0.2.0/docs/.vitepress/cache/deps/cytoscape-cose-bilkent.js +4710 -0
  32. yihuier-0.2.0/docs/.vitepress/cache/deps/cytoscape-cose-bilkent.js.map +7 -0
  33. yihuier-0.2.0/docs/.vitepress/cache/deps/cytoscape.js +30285 -0
  34. yihuier-0.2.0/docs/.vitepress/cache/deps/cytoscape.js.map +7 -0
  35. yihuier-0.2.0/docs/.vitepress/cache/deps/dayjs.js +285 -0
  36. yihuier-0.2.0/docs/.vitepress/cache/deps/dayjs.js.map +7 -0
  37. yihuier-0.2.0/docs/.vitepress/cache/deps/debug.js +468 -0
  38. yihuier-0.2.0/docs/.vitepress/cache/deps/debug.js.map +7 -0
  39. yihuier-0.2.0/docs/.vitepress/cache/deps/package.json +3 -0
  40. yihuier-0.2.0/docs/.vitepress/cache/deps/vitepress___@vue_devtools-api.js +4507 -0
  41. yihuier-0.2.0/docs/.vitepress/cache/deps/vitepress___@vue_devtools-api.js.map +7 -0
  42. yihuier-0.2.0/docs/.vitepress/cache/deps/vitepress___@vueuse_core.js +584 -0
  43. yihuier-0.2.0/docs/.vitepress/cache/deps/vitepress___@vueuse_core.js.map +7 -0
  44. yihuier-0.2.0/docs/.vitepress/cache/deps/vitepress___@vueuse_integrations_useFocusTrap.js +1353 -0
  45. yihuier-0.2.0/docs/.vitepress/cache/deps/vitepress___@vueuse_integrations_useFocusTrap.js.map +7 -0
  46. yihuier-0.2.0/docs/.vitepress/cache/deps/vitepress___mark__js_src_vanilla__js.js +1667 -0
  47. yihuier-0.2.0/docs/.vitepress/cache/deps/vitepress___mark__js_src_vanilla__js.js.map +7 -0
  48. yihuier-0.2.0/docs/.vitepress/cache/deps/vitepress___minisearch.js +1815 -0
  49. yihuier-0.2.0/docs/.vitepress/cache/deps/vitepress___minisearch.js.map +7 -0
  50. yihuier-0.2.0/docs/.vitepress/cache/deps/vue.js +348 -0
  51. yihuier-0.2.0/docs/.vitepress/cache/deps/vue.js.map +7 -0
  52. yihuier-0.2.0/docs/.vitepress/config.mts +127 -0
  53. yihuier-0.2.0/docs/DOCS_README.md +116 -0
  54. yihuier-0.2.0/docs/README.md +167 -0
  55. yihuier-0.2.0/docs/develop/architecture.md +468 -0
  56. yihuier-0.2.0/docs/develop/changelog.md +171 -0
  57. yihuier-0.2.0/docs/develop/contributing.md +445 -0
  58. yihuier-0.2.0/docs/develop/index.md +71 -0
  59. yihuier-0.2.0/docs/guide/advanced-examples.md +751 -0
  60. yihuier-0.2.0/docs/guide/api.md +574 -0
  61. yihuier-0.2.0/docs/guide/best-practices.md +477 -0
  62. yihuier-0.2.0/docs/guide/concepts/evaluation-metrics.md +354 -0
  63. yihuier-0.2.0/docs/guide/concepts/index.md +28 -0
  64. yihuier-0.2.0/docs/guide/concepts/scorecard-basics.md +286 -0
  65. yihuier-0.2.0/docs/guide/concepts/woe-iv.md +258 -0
  66. yihuier-0.2.0/docs/guide/examples.md +447 -0
  67. yihuier-0.2.0/docs/guide/installation.md +252 -0
  68. yihuier-0.2.0/docs/guide/intro.md +232 -0
  69. yihuier-0.2.0/docs/guide/modules/binning.md +828 -0
  70. yihuier-0.2.0/docs/guide/modules/cluster.md +487 -0
  71. yihuier-0.2.0/docs/guide/modules/data-processing.md +607 -0
  72. yihuier-0.2.0/docs/guide/modules/eda.md +511 -0
  73. yihuier-0.2.0/docs/guide/modules/index.md +53 -0
  74. yihuier-0.2.0/docs/guide/modules/model-evaluation.md +647 -0
  75. yihuier-0.2.0/docs/guide/modules/pipeline.md +561 -0
  76. yihuier-0.2.0/docs/guide/modules/scorecard-implement.md +993 -0
  77. yihuier-0.2.0/docs/guide/modules/scorecard-monitor.md +648 -0
  78. yihuier-0.2.0/docs/guide/modules/var-select.md +793 -0
  79. yihuier-0.2.0/docs/guide/quick-start.md +344 -0
  80. yihuier-0.2.0/docs/guide/skill.md +248 -0
  81. yihuier-0.2.0/docs/index.md +142 -0
  82. yihuier-0.2.0/docs/package-lock.json +3800 -0
  83. yihuier-0.2.0/docs/package.json +14 -0
  84. yihuier-0.2.0/examples/advanced_pipeline.py +311 -0
  85. yihuier-0.2.0/examples/basic_usage.py +198 -0
  86. yihuier-0.2.0/pyproject.toml +115 -0
  87. yihuier-0.2.0/requirements.txt +10 -0
  88. yihuier-0.2.0/scripts/publish.sh +52 -0
  89. yihuier-0.2.0/scripts/test_install.sh +64 -0
  90. yihuier-0.2.0/setup.py +52 -0
  91. yihuier-0.2.0/tests/__init__.py +0 -0
  92. yihuier-0.2.0/tests/conftest.py +59 -0
  93. yihuier-0.2.0/tests/test_binning.py +121 -0
  94. yihuier-0.2.0/tests/test_cluster.py +0 -0
  95. yihuier-0.2.0/tests/test_data_processing.py +120 -0
  96. yihuier-0.2.0/tests/test_eda.py +184 -0
  97. yihuier-0.2.0/tests/test_integration.py +15 -0
  98. yihuier-0.2.0/tests/test_model_evaluation.py +193 -0
  99. yihuier-0.2.0/tests/test_scorecard_implement.py +400 -0
  100. yihuier-0.2.0/tests/test_scorecard_monitor.py +0 -0
  101. yihuier-0.2.0/tests/test_var_select.py +144 -0
  102. yihuier-0.2.0/uv.lock +1418 -0
  103. yihuier-0.2.0/yihuier/__init__.py +29 -0
  104. yihuier-0.2.0/yihuier/binning.py +720 -0
  105. yihuier-0.2.0/yihuier/binning_function.py +565 -0
  106. yihuier-0.2.0/yihuier/cluster.py +304 -0
  107. yihuier-0.2.0/yihuier/constants.py +75 -0
  108. yihuier-0.2.0/yihuier/data_processing.py +248 -0
  109. yihuier-0.2.0/yihuier/eda.py +266 -0
  110. yihuier-0.2.0/yihuier/frame.py +61 -0
  111. yihuier-0.2.0/yihuier/model_evaluation.py +223 -0
  112. yihuier-0.2.0/yihuier/pipeline.py +89 -0
  113. yihuier-0.2.0/yihuier/scorecard_implement.py +336 -0
  114. yihuier-0.2.0/yihuier/scorecard_monitor.py +201 -0
  115. yihuier-0.2.0/yihuier/test.py +32 -0
  116. yihuier-0.2.0/yihuier/third_platform_data_evaluation.py +91 -0
  117. yihuier-0.2.0/yihuier/var_select.py +482 -0
  118. yihuier-0.2.0/yihuier/yihuier.py +119 -0
@@ -0,0 +1,58 @@
1
+ name: Deploy Docs
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+ paths:
8
+ - 'docs/**'
9
+ workflow_dispatch:
10
+
11
+ permissions:
12
+ contents: read
13
+ pages: write
14
+ id-token: write
15
+
16
+ concurrency:
17
+ group: "pages"
18
+ cancel-in-progress: false
19
+
20
+ jobs:
21
+ build:
22
+ runs-on: ubuntu-latest
23
+ steps:
24
+ - name: Checkout
25
+ uses: actions/checkout@v4
26
+
27
+ - name: Setup Node.js
28
+ uses: actions/setup-node@v4
29
+ with:
30
+ node-version: '20'
31
+ cache: 'npm'
32
+ cache-dependency-path: docs/package-lock.json
33
+
34
+ - name: Install dependencies
35
+ run: |
36
+ cd docs
37
+ npm ci
38
+
39
+ - name: Build
40
+ run: |
41
+ cd docs
42
+ npm run docs:build
43
+
44
+ - name: Upload artifact
45
+ uses: actions/upload-pages-artifact@v3
46
+ with:
47
+ path: docs/.vitepress/dist
48
+
49
+ deploy:
50
+ environment:
51
+ name: github-pages
52
+ url: ${{ steps.deployment.outputs.page_url }}
53
+ runs-on: ubuntu-latest
54
+ needs: build
55
+ steps:
56
+ - name: Deploy to GitHub Pages
57
+ id: deployment
58
+ uses: actions/deploy-pages@v4
@@ -0,0 +1,43 @@
1
+ .DS_Store
2
+ *.pyc
3
+ __pycache__/
4
+ *.py[cod]
5
+ *$py.class
6
+ *.so
7
+ .Python
8
+ build/
9
+ develop-eggs/
10
+ dist/
11
+ downloads/
12
+ eggs/
13
+ .eggs/
14
+ lib/
15
+ lib64/
16
+ parts/
17
+ sdist/
18
+ var/
19
+ wheels/
20
+ *.egg-info/
21
+ .installed.cfg
22
+ *.egg
23
+ MANIFEST
24
+ .pytest_cache/
25
+ .coverage
26
+ htmlcov/
27
+ .tox/
28
+ .env
29
+ .venv
30
+ env/
31
+ venv/
32
+ ENV/
33
+ env.bak/
34
+ venv.bak/
35
+ .vscode/
36
+ .idea/
37
+ *.swp
38
+ *.swo
39
+ *~
40
+ .ipynb_checkpoints
41
+ data/*
42
+
43
+ docs/node_modules
@@ -0,0 +1 @@
1
+ 3.13
@@ -0,0 +1,104 @@
1
+ # 更新日志
2
+
3
+ 本文档记录 Yihuier 的所有重要变更。
4
+
5
+ 格式基于 [Keep a Changelog](https://keepachangelog.com/zh-CN/1.0.0/),
6
+ 版本号遵循 [语义化版本](https://semver.org/lang/zh-CN/)。
7
+
8
+ ## [0.2.0] - 2026-04-23
9
+
10
+ ### 新增
11
+ - 🤖 **AI 智能建模 Skill**:内置专业的风控建模 Skill,让 AI 助手自动引导完成 10 步建模全流程
12
+ - 自动触发:当询问信用评分卡建模时自动激活
13
+ - 质量保证:内置 AUC ≥ 0.65、KS ≥ 0.15、PSI < 0.25 质量标准
14
+ - 智能诊断:自动检测问题并提供优化建议
15
+ - 📚 **完整的 VitePress 文档网站**
16
+ - 模块化文档结构(指南、概念、API、示例)
17
+ - GitHub Pages 自动部署
18
+ - Mermaid 图表支持
19
+ - 搜索功能
20
+ - ✅ **完善的测试框架**
21
+ - 61 个单元测试,核心功能全覆盖
22
+ - pytest 配置和 CI 集成
23
+ - 测试覆盖率报告
24
+ - 🔧 **类型提示和代码规范化**
25
+ - 完整的类型注解(Python 3.13+)
26
+ - ruff 代码格式化和检查
27
+ - 统一的代码风格
28
+
29
+ ### 修复
30
+ - 修复文档中的死链问题(18 个)
31
+ - 修复绘图函数返回值类型不一致
32
+ - 修复分箱边界处理逻辑
33
+ - 修复数据预处理模块中的常变量删除逻辑
34
+ - 修复 XGBoost API 变更导致的兼容性问题
35
+
36
+ ### 文档
37
+ - 新增快速开始指南
38
+ - 新增 API 参考文档
39
+ - 新增最佳实践指南
40
+ - 新增高级示例集合
41
+ - 新增 AI Skill 专题文档
42
+ - 新增架构设计文档
43
+
44
+ ### 改进
45
+ - 优化项目结构和模块组织
46
+ - 改进错误处理和异常提示
47
+ - 增强 API 一致性和易用性
48
+ - 提升代码可维护性
49
+
50
+ ### 技术栈
51
+ - Python 3.13+
52
+ - pandas >= 2.1.4
53
+ - scikit-learn >= 1.3.2
54
+ - xgboost >= 2.0.3
55
+ - matplotlib >= 3.8.2
56
+ - seaborn >= 0.12.2
57
+
58
+ ## [0.1.13] - 2024-XX-XX
59
+
60
+ ### 新增
61
+ - 🎉 首次发布 Yihuier 评分卡建模工具包
62
+ - 完整的面向对象建模架构
63
+ - 9 个核心模块:EDA、数据预处理、分箱、变量选择、模型评估、评分卡实现、评分卡监控、聚类、流水线
64
+ - 支持多种分箱方法:ChiMerge、等频、等距、单调性分箱
65
+ - 完整的 WOE 转换和 IV 计算
66
+ - 多种变量选择策略:XGBoost、随机森林、相关性筛选
67
+ - 评分卡刻度计算和分数转换
68
+ - PSI 稳定性分析和模型监控
69
+ - 完整的类型提示(Python 3.13+)
70
+ - 61 个单元测试,核心功能全覆盖
71
+ - AI 智能建模 Skill(Claude Code 支持)
72
+
73
+ ### 文档
74
+ - 完整的 VitePress 文档站点
75
+ - 快速开始指南
76
+ - API 参考文档
77
+ - 最佳实践指南
78
+ - 高级示例集合
79
+ - 模块化文档结构
80
+
81
+ ### 技术栈
82
+ - Python 3.13+
83
+ - pandas >= 2.1.4
84
+ - scikit-learn >= 1.3.2
85
+ - xgboost >= 2.0.3
86
+ - matplotlib >= 3.8.2
87
+ - seaborn >= 0.12.2
88
+
89
+ ## [未发布]
90
+
91
+ ### 计划中
92
+ - 更多分箱方法(决策树分箱、最优分箱)
93
+ - 自动化特征工程模块
94
+ - 模型对比和自动选择
95
+ - 更多评估指标和可视化
96
+ - GPU 加速支持
97
+ - 分布式计算支持
98
+
99
+ ---
100
+
101
+ ## 版本说明
102
+
103
+ - **[0.1.0]** - 初始发布版本,包含完整的评分卡建模功能
104
+ - **[未发布]** - 计划中的功能和改进
yihuier-0.2.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Justin Gao
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
yihuier-0.2.0/PKG-INFO ADDED
@@ -0,0 +1,156 @@
1
+ Metadata-Version: 2.4
2
+ Name: yihuier
3
+ Version: 0.2.0
4
+ Summary: 评分卡模型实现函数模块 - 轻松解决逻辑回归建模
5
+ Project-URL: Homepage, https://github.com/ency/yihuier
6
+ Project-URL: Repository, https://github.com/ency/yihuier
7
+ Project-URL: Documentation, https://ency.github.io/yihuier/
8
+ Project-URL: Issues, https://github.com/ency/yihuier/issues
9
+ Author: Justin Gao
10
+ License: MIT
11
+ License-File: LICENSE
12
+ Keywords: credit-scoring,logistic-regression,machine-learning,scorecard
13
+ Classifier: Development Status :: 4 - Beta
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Intended Audience :: Financial and Insurance Industry
16
+ Classifier: License :: OSI Approved :: MIT License
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
+ Requires-Python: >=3.13
21
+ Requires-Dist: matplotlib>=3.8.2
22
+ Requires-Dist: numpy>=1.26.0
23
+ Requires-Dist: pandas>=2.1.4
24
+ Requires-Dist: scikit-learn>=1.3.2
25
+ Requires-Dist: scipy>=1.11.0
26
+ Requires-Dist: seaborn>=0.12.2
27
+ Requires-Dist: statsmodels>=0.14.0
28
+ Requires-Dist: tqdm>=4.66.1
29
+ Requires-Dist: xgboost>=2.0.3
30
+ Provides-Extra: all
31
+ Requires-Dist: pytest-cov>=4.0.0; extra == 'all'
32
+ Requires-Dist: pytest>=7.0.0; extra == 'all'
33
+ Requires-Dist: ruff>=0.1.0; extra == 'all'
34
+ Requires-Dist: ydata-profiling>=4.6.0; extra == 'all'
35
+ Provides-Extra: dev
36
+ Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
37
+ Requires-Dist: pytest>=7.0.0; extra == 'dev'
38
+ Requires-Dist: ruff>=0.1.0; extra == 'dev'
39
+ Provides-Extra: profiling
40
+ Requires-Dist: ydata-profiling>=4.6.0; extra == 'profiling'
41
+ Description-Content-Type: text/markdown
42
+
43
+ # Yihuier
44
+
45
+ 一会儿轻松解决信用评分卡建模
46
+
47
+ 基于 [Scorecard--Function](https://github.com/taenggu0309/Scorecard--Function) 重构的面向对象版本。
48
+
49
+ ## 特性
50
+
51
+ - **面向对象设计** - 统一的 Yihuier 类管理数据和状态
52
+ - **完整建模流程** - EDA → 数据处理 → 分箱 → 变量选择 → 模型评估 → 评分卡实现 → 监控
53
+ - **模块化架构** - 9个独立模块,职责清晰
54
+ - **类型提示** - 完整的类型注解,更好的IDE支持
55
+
56
+ ## 快速开始
57
+
58
+ ### 安装
59
+
60
+ ```bash
61
+ # 使用 pip
62
+ pip install yihuier
63
+
64
+ # 或使用 uv
65
+ uv pip install yihuier
66
+ ```
67
+
68
+ ### 基础使用
69
+
70
+ ```python
71
+ import pandas as pd
72
+ from yihuier import Yihuier
73
+
74
+ # 加载数据
75
+ data = pd.read_csv('data.csv')
76
+ yh = Yihuier(data, target='dlq_flag')
77
+
78
+ # 数据预处理
79
+ data_clean = yh.dp_module.delete_missing_var(threshold=0.15)
80
+
81
+ # 变量分箱
82
+ bin_df, iv_value = yh.binning_module.binning_num(
83
+ col_list=['v1', 'v2', 'v3'],
84
+ max_bin=5,
85
+ method='ChiMerge'
86
+ )
87
+
88
+ # WOE转换
89
+ woe_df = yh.binning_module.woe_df_concat()
90
+ data_woe = yh.binning_module.woe_transform()
91
+
92
+ # 变量选择
93
+ xg_imp, xg_rank, xg_cols = yh.var_select_module.select_xgboost(
94
+ col_list=data_woe.drop(['dlq_flag'], axis=1).columns.tolist(),
95
+ imp_num=10
96
+ )
97
+
98
+ # 模型训练
99
+ from sklearn.linear_model import LogisticRegression
100
+ model = LogisticRegression()
101
+ model.fit(data_woe[xg_cols], data_woe['dlq_flag'])
102
+
103
+ # 模型评估
104
+ y_pred = model.predict_proba(x_test)[:, 1]
105
+ yh.me_module.plot_roc(y_test, y_pred)
106
+ yh.me_module.plot_model_ks(y_test, y_pred)
107
+ ```
108
+
109
+ ## 模块概览
110
+
111
+ | 模块 | 功能 | 文档 |
112
+ |------|------|------|
113
+ | `EDAModule` | 探索性数据分析 | [📖 EDA 模块](https://encyc.github.io/yihuier/guide/modules/eda.html) |
114
+ | `DataProcessingModule` | 数据预处理 | [📖 数据预处理](https://encyc.github.io/yihuier/guide/modules/data-processing.html) |
115
+ | `BinningModule` | 变量分箱 | [📖 分箱模块](https://encyc.github.io/yihuier/guide/modules/binning.html) |
116
+ | `VarSelectModule` | 变量选择 | [📖 变量选择](https://encyc.github.io/yihuier/guide/modules/var-select.html) |
117
+ | `ModelEvaluationModule` | 模型评估 | [📖 模型评估](https://encyc.github.io/yihuier/guide/modules/model-evaluation.html) |
118
+ | `ScorecardImplementModule` | 评分卡实现 | [📖 评分卡实现](https://encyc.github.io/yihuier/guide/modules/scorecard-implement.html) |
119
+ | `ScorecardMonitorModule` | 评分卡监控 | [📖 评分卡监控](https://encyc.github.io/yihuier/guide/modules/scorecard-monitor.html) |
120
+ | `ClusterModule` | 聚类分析 | [📖 聚类模块](https://encyc.github.io/yihuier/guide/modules/cluster.html) |
121
+ | `PipelineModule` | 流水线 | [📖 流水线模块](https://encyc.github.io/yihuier/guide/modules/pipeline.html) |
122
+
123
+ ## 📚 完整文档
124
+
125
+ - **[在线文档](https://encyc.github.io/yihuier/)** - VitePress 部署的完整文档网站
126
+ - **[快速开始](https://encyc.github.io/yihuier/guide/quick-start)** - 5分钟快速上手
127
+ - **[API 参考](https://encyc.github.io/yihuier/guide/api)** - 完整的 API 文档
128
+ - **[最佳实践](https://encyc.github.io/yihuier/guide/best-practices)** - 行业最佳实践指南
129
+ - **[示例集合](https://encyc.github.io/yihuier/guide/examples)** - 常用场景代码示例
130
+
131
+ ## 开发
132
+
133
+ ```bash
134
+ # 克隆项目
135
+ git clone https://github.com/ency/yihuier.git
136
+ cd yihuier
137
+
138
+ # 安装开发依赖
139
+ uv pip install -e ".[dev]"
140
+
141
+ # 运行测试
142
+ pytest tests/ -v
143
+
144
+ # 代码格式化
145
+ ruff format yihuier/
146
+ ruff check yihuier/
147
+ ```
148
+
149
+ ## 许可证
150
+
151
+ MIT License
152
+
153
+ ## 致谢
154
+
155
+ - 原项目: [Scorecard--Function](https://github.com/taenggu0309/Scorecard--Function)
156
+ - 相关文章: [知乎专栏](https://zhuanlan.zhihu.com/p/675830391)
@@ -0,0 +1,138 @@
1
+ # PyPI 发布指南
2
+
3
+ ## 发布前检查清单
4
+
5
+ ### ✅ 已完成
6
+
7
+ - [x] pyproject.toml 配置完整
8
+ - [x] README.md 内容完善
9
+ - [x] LICENSE 文件创建(MIT)
10
+ - [x] 版本号定义(0.1.0)
11
+ - [x] 依赖声明完整
12
+ - [x] 构建工具安装(build, twine)
13
+ - [x] 包构建测试通过
14
+ - [x] twine check 验证通过
15
+
16
+ ### 📦 构建产物
17
+
18
+ ```bash
19
+ dist/
20
+ ├── yihuier-0.1.0-py3-none-any.whl (42KB)
21
+ └── yihuier-0.1.0.tar.gz (1.5MB)
22
+ ```
23
+
24
+ ## 发布步骤
25
+
26
+ ### 1. 配置 PyPI API Token
27
+
28
+ ```bash
29
+ # 创建 ~/.pypirc
30
+ cat > ~/.pypirc << 'EOF'
31
+ [distutils]
32
+ index-servers =
33
+ pypi
34
+ testpypi
35
+
36
+ [pypi]
37
+ username = __token__
38
+ password = <your-pypi-token>
39
+
40
+ [testpypi]
41
+ username = __token__
42
+ password = <your-testpypi-token>
43
+ EOF
44
+ ```
45
+
46
+ **获取 Token**:
47
+ - 访问 https://pypi.org/manage/account/token/
48
+ - 创建新的 API token
49
+ - 将 token 粘贴到 ~/.pypirc 中
50
+
51
+ ### 2. 测试发布(推荐)
52
+
53
+ 先发布到 TestPyPI 验证:
54
+
55
+ ```bash
56
+ # 发布到 TestPyPI
57
+ python -m twine upload --repository testpypi dist/*
58
+
59
+ # 测试安装
60
+ pip install --index-url https://test.pypi.org/simple/ yihuier
61
+ ```
62
+
63
+ ### 3. 正式发布
64
+
65
+ ```bash
66
+ # 发布到 PyPI
67
+ python -m twine upload dist/*
68
+ ```
69
+
70
+ ### 4. 验证发布
71
+
72
+ ```bash
73
+ # 等待 1-2 分钟后
74
+ pip install yihuier
75
+
76
+ # 验证安装
77
+ python -c "from yihuier import Yihuier; print(Yihuier.__doc__)"
78
+ ```
79
+
80
+ ## 发布后注意事项
81
+
82
+ 1. **版本管理**:下次发布前更新版本号
83
+ ```toml
84
+ # pyproject.toml
85
+ version = "0.1.1" # 或 0.2.0, 1.0.0 等
86
+ ```
87
+
88
+ 2. **Git 标签**:为每个发布版本打标签
89
+ ```bash
90
+ git tag v0.1.0
91
+ git push origin v0.1.0
92
+ ```
93
+
94
+ 3. **GitHub Release**:在 GitHub 创建 Release
95
+ - 上传构建的 dist 文件
96
+ - 添加更新日志
97
+
98
+ 4. **文档更新**:确保文档站点版本号一致
99
+
100
+ ## 常见问题
101
+
102
+ ### Q: 包名已被占用怎么办?
103
+
104
+ A: 在 pyproject.toml 中修改包名:
105
+ ```toml
106
+ name = "yihuier-scorecard" # 或其他可用名称
107
+ ```
108
+
109
+ ### Q: 上传失败怎么办?
110
+
111
+ A: 检查:
112
+ 1. Token 是否正确
113
+ 2. 版本号是否已存在(不能重复上传相同版本)
114
+ 3. 网络连接是否正常
115
+
116
+ ### Q: 如何撤销已发布的包?
117
+
118
+ A: PyPI 不支持删除包,但可以:
119
+ 1. 发布新版本修复问题
120
+ 2. 联系 PyPI 管理员(紧急情况)
121
+
122
+ ## 版本号规范
123
+
124
+ 遵循语义化版本(Semantic Versioning):
125
+
126
+ - **MAJOR.MINOR.PATCH**(如 1.2.3)
127
+ - **MAJOR**:不兼容的 API 变更
128
+ - **MINOR**:向后兼容的功能新增
129
+ - **PATCH**:向后兼容的问题修复
130
+
131
+ 首次发布建议:**0.1.0** 或 **1.0.0**
132
+
133
+ ## 后续改进
134
+
135
+ - [ ] 添加 GitHub Actions 自动发布
136
+ - [ ] 配置 CI/CD 自动测试
137
+ - [ ] 添加更多示例代码
138
+ - [ ] 完善文档和教程
@@ -0,0 +1,114 @@
1
+ # Yihuier
2
+
3
+ 一会儿轻松解决信用评分卡建模
4
+
5
+ 基于 [Scorecard--Function](https://github.com/taenggu0309/Scorecard--Function) 重构的面向对象版本。
6
+
7
+ ## 特性
8
+
9
+ - **面向对象设计** - 统一的 Yihuier 类管理数据和状态
10
+ - **完整建模流程** - EDA → 数据处理 → 分箱 → 变量选择 → 模型评估 → 评分卡实现 → 监控
11
+ - **模块化架构** - 9个独立模块,职责清晰
12
+ - **类型提示** - 完整的类型注解,更好的IDE支持
13
+
14
+ ## 快速开始
15
+
16
+ ### 安装
17
+
18
+ ```bash
19
+ # 使用 pip
20
+ pip install yihuier
21
+
22
+ # 或使用 uv
23
+ uv pip install yihuier
24
+ ```
25
+
26
+ ### 基础使用
27
+
28
+ ```python
29
+ import pandas as pd
30
+ from yihuier import Yihuier
31
+
32
+ # 加载数据
33
+ data = pd.read_csv('data.csv')
34
+ yh = Yihuier(data, target='dlq_flag')
35
+
36
+ # 数据预处理
37
+ data_clean = yh.dp_module.delete_missing_var(threshold=0.15)
38
+
39
+ # 变量分箱
40
+ bin_df, iv_value = yh.binning_module.binning_num(
41
+ col_list=['v1', 'v2', 'v3'],
42
+ max_bin=5,
43
+ method='ChiMerge'
44
+ )
45
+
46
+ # WOE转换
47
+ woe_df = yh.binning_module.woe_df_concat()
48
+ data_woe = yh.binning_module.woe_transform()
49
+
50
+ # 变量选择
51
+ xg_imp, xg_rank, xg_cols = yh.var_select_module.select_xgboost(
52
+ col_list=data_woe.drop(['dlq_flag'], axis=1).columns.tolist(),
53
+ imp_num=10
54
+ )
55
+
56
+ # 模型训练
57
+ from sklearn.linear_model import LogisticRegression
58
+ model = LogisticRegression()
59
+ model.fit(data_woe[xg_cols], data_woe['dlq_flag'])
60
+
61
+ # 模型评估
62
+ y_pred = model.predict_proba(x_test)[:, 1]
63
+ yh.me_module.plot_roc(y_test, y_pred)
64
+ yh.me_module.plot_model_ks(y_test, y_pred)
65
+ ```
66
+
67
+ ## 模块概览
68
+
69
+ | 模块 | 功能 | 文档 |
70
+ |------|------|------|
71
+ | `EDAModule` | 探索性数据分析 | [📖 EDA 模块](https://encyc.github.io/yihuier/guide/modules/eda.html) |
72
+ | `DataProcessingModule` | 数据预处理 | [📖 数据预处理](https://encyc.github.io/yihuier/guide/modules/data-processing.html) |
73
+ | `BinningModule` | 变量分箱 | [📖 分箱模块](https://encyc.github.io/yihuier/guide/modules/binning.html) |
74
+ | `VarSelectModule` | 变量选择 | [📖 变量选择](https://encyc.github.io/yihuier/guide/modules/var-select.html) |
75
+ | `ModelEvaluationModule` | 模型评估 | [📖 模型评估](https://encyc.github.io/yihuier/guide/modules/model-evaluation.html) |
76
+ | `ScorecardImplementModule` | 评分卡实现 | [📖 评分卡实现](https://encyc.github.io/yihuier/guide/modules/scorecard-implement.html) |
77
+ | `ScorecardMonitorModule` | 评分卡监控 | [📖 评分卡监控](https://encyc.github.io/yihuier/guide/modules/scorecard-monitor.html) |
78
+ | `ClusterModule` | 聚类分析 | [📖 聚类模块](https://encyc.github.io/yihuier/guide/modules/cluster.html) |
79
+ | `PipelineModule` | 流水线 | [📖 流水线模块](https://encyc.github.io/yihuier/guide/modules/pipeline.html) |
80
+
81
+ ## 📚 完整文档
82
+
83
+ - **[在线文档](https://encyc.github.io/yihuier/)** - VitePress 部署的完整文档网站
84
+ - **[快速开始](https://encyc.github.io/yihuier/guide/quick-start)** - 5分钟快速上手
85
+ - **[API 参考](https://encyc.github.io/yihuier/guide/api)** - 完整的 API 文档
86
+ - **[最佳实践](https://encyc.github.io/yihuier/guide/best-practices)** - 行业最佳实践指南
87
+ - **[示例集合](https://encyc.github.io/yihuier/guide/examples)** - 常用场景代码示例
88
+
89
+ ## 开发
90
+
91
+ ```bash
92
+ # 克隆项目
93
+ git clone https://github.com/ency/yihuier.git
94
+ cd yihuier
95
+
96
+ # 安装开发依赖
97
+ uv pip install -e ".[dev]"
98
+
99
+ # 运行测试
100
+ pytest tests/ -v
101
+
102
+ # 代码格式化
103
+ ruff format yihuier/
104
+ ruff check yihuier/
105
+ ```
106
+
107
+ ## 许可证
108
+
109
+ MIT License
110
+
111
+ ## 致谢
112
+
113
+ - 原项目: [Scorecard--Function](https://github.com/taenggu0309/Scorecard--Function)
114
+ - 相关文章: [知乎专栏](https://zhuanlan.zhihu.com/p/675830391)