sqlseed 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlseed-0.1.0/.gitignore +69 -0
- sqlseed-0.1.0/CHANGELOG.md +80 -0
- sqlseed-0.1.0/LICENSE +17 -0
- sqlseed-0.1.0/PKG-INFO +934 -0
- sqlseed-0.1.0/README.md +882 -0
- sqlseed-0.1.0/pyproject.toml +121 -0
- sqlseed-0.1.0/src/sqlseed/__init__.py +121 -0
- sqlseed-0.1.0/src/sqlseed/_utils/__init__.py +11 -0
- sqlseed-0.1.0/src/sqlseed/_utils/logger.py +30 -0
- sqlseed-0.1.0/src/sqlseed/_utils/metrics.py +45 -0
- sqlseed-0.1.0/src/sqlseed/_utils/progress.py +14 -0
- sqlseed-0.1.0/src/sqlseed/_utils/schema_helpers.py +51 -0
- sqlseed-0.1.0/src/sqlseed/_utils/sql_safe.py +45 -0
- sqlseed-0.1.0/src/sqlseed/_version.py +1 -0
- sqlseed-0.1.0/src/sqlseed/cli/__init__.py +3 -0
- sqlseed-0.1.0/src/sqlseed/cli/main.py +316 -0
- sqlseed-0.1.0/src/sqlseed/config/__init__.py +14 -0
- sqlseed-0.1.0/src/sqlseed/config/loader.py +66 -0
- sqlseed-0.1.0/src/sqlseed/config/models.py +99 -0
- sqlseed-0.1.0/src/sqlseed/config/snapshot.py +91 -0
- sqlseed-0.1.0/src/sqlseed/core/__init__.py +14 -0
- sqlseed-0.1.0/src/sqlseed/core/column_dag.py +108 -0
- sqlseed-0.1.0/src/sqlseed/core/constraints.py +116 -0
- sqlseed-0.1.0/src/sqlseed/core/expression.py +71 -0
- sqlseed-0.1.0/src/sqlseed/core/mapper.py +257 -0
- sqlseed-0.1.0/src/sqlseed/core/orchestrator.py +578 -0
- sqlseed-0.1.0/src/sqlseed/core/relation.py +124 -0
- sqlseed-0.1.0/src/sqlseed/core/result.py +23 -0
- sqlseed-0.1.0/src/sqlseed/core/schema.py +97 -0
- sqlseed-0.1.0/src/sqlseed/core/transform.py +27 -0
- sqlseed-0.1.0/src/sqlseed/database/__init__.py +14 -0
- sqlseed-0.1.0/src/sqlseed/database/_protocol.py +72 -0
- sqlseed-0.1.0/src/sqlseed/database/optimizer.py +96 -0
- sqlseed-0.1.0/src/sqlseed/database/raw_sqlite_adapter.py +197 -0
- sqlseed-0.1.0/src/sqlseed/database/sqlite_utils_adapter.py +183 -0
- sqlseed-0.1.0/src/sqlseed/generators/__init__.py +11 -0
- sqlseed-0.1.0/src/sqlseed/generators/_protocol.py +73 -0
- sqlseed-0.1.0/src/sqlseed/generators/base_provider.py +448 -0
- sqlseed-0.1.0/src/sqlseed/generators/faker_provider.py +157 -0
- sqlseed-0.1.0/src/sqlseed/generators/mimesis_provider.py +203 -0
- sqlseed-0.1.0/src/sqlseed/generators/registry.py +86 -0
- sqlseed-0.1.0/src/sqlseed/generators/stream.py +157 -0
- sqlseed-0.1.0/src/sqlseed/py.typed +0 -0
sqlseed-0.1.0/.gitignore
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
*.so
|
|
6
|
+
.Python
|
|
7
|
+
build/
|
|
8
|
+
develop-eggs/
|
|
9
|
+
dist/
|
|
10
|
+
downloads/
|
|
11
|
+
eggs/
|
|
12
|
+
.eggs/
|
|
13
|
+
lib/
|
|
14
|
+
lib64/
|
|
15
|
+
parts/
|
|
16
|
+
sdist/
|
|
17
|
+
var/
|
|
18
|
+
wheels/
|
|
19
|
+
*.egg-info/
|
|
20
|
+
.installed.cfg
|
|
21
|
+
*.egg
|
|
22
|
+
|
|
23
|
+
# Virtual Environment
|
|
24
|
+
.venv/
|
|
25
|
+
env/
|
|
26
|
+
ENV/
|
|
27
|
+
|
|
28
|
+
# IDE
|
|
29
|
+
.vscode/
|
|
30
|
+
.idea/
|
|
31
|
+
*.swp
|
|
32
|
+
*.swo
|
|
33
|
+
*~
|
|
34
|
+
|
|
35
|
+
# Testing
|
|
36
|
+
.pytest_cache/
|
|
37
|
+
.coverage
|
|
38
|
+
htmlcov/
|
|
39
|
+
|
|
40
|
+
# Type checking
|
|
41
|
+
.mypy_cache/
|
|
42
|
+
.dmypy.json
|
|
43
|
+
dmypy.json
|
|
44
|
+
|
|
45
|
+
# Linting
|
|
46
|
+
.ruff_cache/
|
|
47
|
+
|
|
48
|
+
# Project specific
|
|
49
|
+
*.db
|
|
50
|
+
*.sqlite
|
|
51
|
+
*.sqlite3
|
|
52
|
+
snapshots/
|
|
53
|
+
|
|
54
|
+
# AI cache
|
|
55
|
+
.sqlseed_cache/
|
|
56
|
+
|
|
57
|
+
# Archived temp files
|
|
58
|
+
_archived_temp/
|
|
59
|
+
|
|
60
|
+
# macOS
|
|
61
|
+
.DS_Store
|
|
62
|
+
|
|
63
|
+
# Trae IDE
|
|
64
|
+
.trae/
|
|
65
|
+
|
|
66
|
+
# Build artifacts
|
|
67
|
+
dist/
|
|
68
|
+
*.whl
|
|
69
|
+
*.tar.gz
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
# 变更日志
|
|
2
|
+
|
|
3
|
+
本项目所有重要变更将记录在此文件中。
|
|
4
|
+
|
|
5
|
+
格式基于 [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
|
+
本项目遵循[语义化版本](https://semver.org/spec/v2.0.0.html)。
|
|
7
|
+
|
|
8
|
+
## [未发布]
|
|
9
|
+
|
|
10
|
+
### 新增
|
|
11
|
+
|
|
12
|
+
#### 核心引擎
|
|
13
|
+
- 核心编排引擎 `DataOrchestrator`,支持流式批量生成
|
|
14
|
+
- `ColumnMapper` 8 级策略链(精确匹配 → 模式匹配 → 类型回退 → 默认)
|
|
15
|
+
- `DatabaseAdapter` Protocol,含 `SQLiteUtilsAdapter` 和 `RawSQLiteAdapter`
|
|
16
|
+
- `PragmaOptimizer` 三级优化(LIGHT / MODERATE / AGGRESSIVE)
|
|
17
|
+
- `DataProvider` Protocol,含 `BaseProvider`、`FakerProvider`、`MimesisProvider`
|
|
18
|
+
- `DataStream` 流式数据生成器,内存高效的批量处理
|
|
19
|
+
- `RelationResolver` 外键依赖拓扑排序
|
|
20
|
+
- 基于 `pluggy` 的插件系统,10 个 Hook 点
|
|
21
|
+
- CLI 命令:`fill`、`preview`、`inspect`、`init`、`replay`、`ai-suggest`
|
|
22
|
+
- Python API:`sqlseed.fill()`、`sqlseed.connect()`、`sqlseed.fill_from_config()`、`sqlseed.preview()`
|
|
23
|
+
- YAML/JSON 配置文件支持
|
|
24
|
+
- 配置快照保存与回放
|
|
25
|
+
- SQL 注入防护(`quote_identifier()` 工具)
|
|
26
|
+
|
|
27
|
+
#### v2.0 — 列 DAG 与表达式引擎
|
|
28
|
+
- `ColumnDAG` 列依赖解析,基于拓扑排序
|
|
29
|
+
- `ExpressionEngine` 基于 `simpleeval` 的安全表达式求值,带基于线程的超时保护
|
|
30
|
+
- `ConstraintSolver` 唯一性约束求解,支持重试和回溯
|
|
31
|
+
- `TransformLoader` 用户 Python 脚本动态加载(`importlib`)
|
|
32
|
+
- `SharedPool` 跨表值共享,维持引用完整性
|
|
33
|
+
- `IndexInfo` 数据类和 `get_index_info()` 加入 `DatabaseAdapter` Protocol
|
|
34
|
+
- `get_sample_rows()` 方法加入 `DatabaseAdapter` Protocol,用于上下文嗅探
|
|
35
|
+
- `sqlseed_ai_analyze_table` Hook(firstresult),AI 驱动的 Schema 分析
|
|
36
|
+
- `sqlseed_shared_pool_loaded` Hook,跨表关联追踪
|
|
37
|
+
|
|
38
|
+
#### AI 插件(sqlseed-ai)
|
|
39
|
+
- `SchemaAnalyzer` LLM 集成(OpenAI 兼容 API)
|
|
40
|
+
- 上下文嗅探:提取列、索引、样本数据、外键供 LLM 分析
|
|
41
|
+
- `AIConfig` 可配置模型、API Key 和 Base URL
|
|
42
|
+
- 默认模型:`qwen3-coder-plus`,支持环境变量覆盖
|
|
43
|
+
- CLI `ai-suggest` 命令,AI 驱动的 YAML 生成
|
|
44
|
+
- 自然语言配置(`nl_config.py`)
|
|
45
|
+
|
|
46
|
+
#### AI Evolution — 智能增强
|
|
47
|
+
- 结构化输出迁移:YAML → JSON,`response_format` 强制 JSON 输出
|
|
48
|
+
- 自纠正闭环:`AiConfigRefiner` 自动检测并修复无效配置,支持最多 3 轮重试
|
|
49
|
+
- 错误摘要系统:`errors.py` 智能分类错误(未知生成器、Pydantic 验证、表达式超时等)
|
|
50
|
+
- 数据分布增强:`profile_column_distribution()` 分析列数据分布,注入 LLM 上下文
|
|
51
|
+
- Few-shot 示例库:4 个典型场景示例(用户表、银行卡表、订单表、员工表)
|
|
52
|
+
- 文件缓存:带 schema hash 校验的配置缓存,`--no-cache` 标志跳过缓存
|
|
53
|
+
- 预计算模板池:`sqlseed_pre_generate_templates` Hook,AI 为复杂列预生成值
|
|
54
|
+
- MCP 增强:Schema Resource、schema_hash 工具返回值
|
|
55
|
+
|
|
56
|
+
#### 架构优化
|
|
57
|
+
- 约束求解器回溯机制:`RegisterResult` + `try_register()`,派生列 UNIQUE 约束失败时回溯源列
|
|
58
|
+
- Refiner 解耦:`get_column_names()` + `get_skippable_columns()` 公开接口,不再访问私有属性
|
|
59
|
+
- 语义化异常:`UnknownGeneratorError` 替代脆弱的字符串匹配
|
|
60
|
+
- AI 建议扩展:支持 `integer`、`date`、`datetime`、`choice` 类型列
|
|
61
|
+
- 词边界列匹配:`_is_simple_column()` 使用正则词边界替代子串匹配
|
|
62
|
+
- 复合唯一约束:`check_composite()` + `unregister_composite()`
|
|
63
|
+
- 大数据集优化:`probabilistic=True` 启用 hash-based 去重,降低内存占用
|
|
64
|
+
|
|
65
|
+
#### MCP 服务器(mcp-server-sqlseed)
|
|
66
|
+
- `sqlseed_inspect_schema` 工具 — 检查数据库 Schema(列、外键、索引、样本数据)
|
|
67
|
+
- `sqlseed_generate_yaml` 工具 — AI 驱动的 YAML 配置生成
|
|
68
|
+
- `sqlseed_execute_fill` 工具 — 执行数据生成(支持 YAML 配置)
|
|
69
|
+
- 基于 FastMCP 的服务器,支持 `python -m mcp_server_sqlseed`
|
|
70
|
+
|
|
71
|
+
### 修复
|
|
72
|
+
- Hook `firstresult` 语义与设计文档对齐(`transform_row` 和 `transform_batch`)
|
|
73
|
+
- `validate_table_name` 增加正则验证和适当警告
|
|
74
|
+
- 移除编排器中冗余的种子双重设置
|
|
75
|
+
- 将重复的 `_is_autoincrement` 逻辑提取到共享的 `schema_helpers` 工具
|
|
76
|
+
- 添加 `fill()` 别名到 `DataOrchestrator`,与设计文档 API 兼容
|
|
77
|
+
- CLI `fill` 命令使用 `--config` 时 `db_path` 改为可选
|
|
78
|
+
- 表达式引擎增加超时保护(默认 5 秒),防止无限循环
|
|
79
|
+
- 解决 `random.seed()` 类型异常(`rstr` 集成于流式生成器)
|
|
80
|
+
- `fill_from_config` 中 transform 属性正确传递到内部编排器
|
sqlseed-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
GNU AFFERO GENERAL PUBLIC LICENSE
|
|
2
|
+
Version 3, 19 November 2007
|
|
3
|
+
|
|
4
|
+
Copyright (C) 2026 sqlseed Authors
|
|
5
|
+
|
|
6
|
+
This program is free software: you can redistribute it and/or modify
|
|
7
|
+
it under the terms of the GNU Affero General Public License as published by
|
|
8
|
+
the Free Software Foundation, either version 3 of the License, or
|
|
9
|
+
(at your option) any later version.
|
|
10
|
+
|
|
11
|
+
This program is distributed in the hope that it will be useful,
|
|
12
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
14
|
+
GNU Affero General Public License for more details.
|
|
15
|
+
|
|
16
|
+
You should have received a copy of the GNU Affero General Public License
|
|
17
|
+
along with this program. If not, see <https://www.gnu.org/licenses/>.
|