sqlseed 0.1.11__tar.gz → 0.1.12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. {sqlseed-0.1.11 → sqlseed-0.1.12}/CHANGELOG.md +2 -1
  2. {sqlseed-0.1.11 → sqlseed-0.1.12}/PKG-INFO +40 -26
  3. {sqlseed-0.1.11 → sqlseed-0.1.12}/README.md +39 -25
  4. sqlseed-0.1.12/src/sqlseed/_version.py +8 -0
  5. {sqlseed-0.1.11 → sqlseed-0.1.12}/src/sqlseed/cli/main.py +87 -27
  6. {sqlseed-0.1.11 → sqlseed-0.1.12}/src/sqlseed/core/mapper.py +18 -0
  7. {sqlseed-0.1.11 → sqlseed-0.1.12}/src/sqlseed/core/orchestrator.py +2 -0
  8. {sqlseed-0.1.11 → sqlseed-0.1.12}/src/sqlseed/database/raw_sqlite_adapter.py +1 -0
  9. {sqlseed-0.1.11 → sqlseed-0.1.12}/src/sqlseed/database/sqlite_utils_adapter.py +1 -0
  10. sqlseed-0.1.11/src/sqlseed/_version.py +0 -3
  11. {sqlseed-0.1.11 → sqlseed-0.1.12}/.gitignore +0 -0
  12. {sqlseed-0.1.11 → sqlseed-0.1.12}/LICENSE +0 -0
  13. {sqlseed-0.1.11 → sqlseed-0.1.12}/plugins/mcp-server-sqlseed/README.md +0 -0
  14. {sqlseed-0.1.11 → sqlseed-0.1.12}/plugins/mcp-server-sqlseed/pyproject.toml +0 -0
  15. {sqlseed-0.1.11 → sqlseed-0.1.12}/plugins/mcp-server-sqlseed/src/mcp_server_sqlseed/__init__.py +0 -0
  16. {sqlseed-0.1.11 → sqlseed-0.1.12}/plugins/mcp-server-sqlseed/src/mcp_server_sqlseed/__main__.py +0 -0
  17. {sqlseed-0.1.11 → sqlseed-0.1.12}/plugins/mcp-server-sqlseed/src/mcp_server_sqlseed/config.py +0 -0
  18. {sqlseed-0.1.11 → sqlseed-0.1.12}/plugins/mcp-server-sqlseed/src/mcp_server_sqlseed/server.py +0 -0
  19. {sqlseed-0.1.11 → sqlseed-0.1.12}/plugins/sqlseed-ai/README.md +0 -0
  20. {sqlseed-0.1.11 → sqlseed-0.1.12}/plugins/sqlseed-ai/pyproject.toml +0 -0
  21. {sqlseed-0.1.11 → sqlseed-0.1.12}/plugins/sqlseed-ai/src/sqlseed_ai/__init__.py +0 -0
  22. {sqlseed-0.1.11 → sqlseed-0.1.12}/plugins/sqlseed-ai/src/sqlseed_ai/_client.py +0 -0
  23. {sqlseed-0.1.11 → sqlseed-0.1.12}/plugins/sqlseed-ai/src/sqlseed_ai/_json_utils.py +0 -0
  24. {sqlseed-0.1.11 → sqlseed-0.1.12}/plugins/sqlseed-ai/src/sqlseed_ai/analyzer.py +0 -0
  25. {sqlseed-0.1.11 → sqlseed-0.1.12}/plugins/sqlseed-ai/src/sqlseed_ai/config.py +0 -0
  26. {sqlseed-0.1.11 → sqlseed-0.1.12}/plugins/sqlseed-ai/src/sqlseed_ai/errors.py +0 -0
  27. {sqlseed-0.1.11 → sqlseed-0.1.12}/plugins/sqlseed-ai/src/sqlseed_ai/examples.py +0 -0
  28. {sqlseed-0.1.11 → sqlseed-0.1.12}/plugins/sqlseed-ai/src/sqlseed_ai/provider.py +0 -0
  29. {sqlseed-0.1.11 → sqlseed-0.1.12}/plugins/sqlseed-ai/src/sqlseed_ai/refiner.py +0 -0
  30. {sqlseed-0.1.11 → sqlseed-0.1.12}/pyproject.toml +0 -0
  31. {sqlseed-0.1.11 → sqlseed-0.1.12}/src/sqlseed/__init__.py +0 -0
  32. {sqlseed-0.1.11 → sqlseed-0.1.12}/src/sqlseed/_utils/__init__.py +0 -0
  33. {sqlseed-0.1.11 → sqlseed-0.1.12}/src/sqlseed/_utils/logger.py +0 -0
  34. {sqlseed-0.1.11 → sqlseed-0.1.12}/src/sqlseed/_utils/metrics.py +0 -0
  35. {sqlseed-0.1.11 → sqlseed-0.1.12}/src/sqlseed/_utils/progress.py +0 -0
  36. {sqlseed-0.1.11 → sqlseed-0.1.12}/src/sqlseed/_utils/schema_helpers.py +0 -0
  37. {sqlseed-0.1.11 → sqlseed-0.1.12}/src/sqlseed/_utils/sql_safe.py +0 -0
  38. {sqlseed-0.1.11 → sqlseed-0.1.12}/src/sqlseed/cli/__init__.py +0 -0
  39. {sqlseed-0.1.11 → sqlseed-0.1.12}/src/sqlseed/config/__init__.py +0 -0
  40. {sqlseed-0.1.11 → sqlseed-0.1.12}/src/sqlseed/config/loader.py +0 -0
  41. {sqlseed-0.1.11 → sqlseed-0.1.12}/src/sqlseed/config/models.py +0 -0
  42. {sqlseed-0.1.11 → sqlseed-0.1.12}/src/sqlseed/config/snapshot.py +0 -0
  43. {sqlseed-0.1.11 → sqlseed-0.1.12}/src/sqlseed/core/__init__.py +0 -0
  44. {sqlseed-0.1.11 → sqlseed-0.1.12}/src/sqlseed/core/column_dag.py +0 -0
  45. {sqlseed-0.1.11 → sqlseed-0.1.12}/src/sqlseed/core/constraints.py +0 -0
  46. {sqlseed-0.1.11 → sqlseed-0.1.12}/src/sqlseed/core/enrichment.py +0 -0
  47. {sqlseed-0.1.11 → sqlseed-0.1.12}/src/sqlseed/core/expression.py +0 -0
  48. {sqlseed-0.1.11 → sqlseed-0.1.12}/src/sqlseed/core/plugin_mediator.py +0 -0
  49. {sqlseed-0.1.11 → sqlseed-0.1.12}/src/sqlseed/core/relation.py +0 -0
  50. {sqlseed-0.1.11 → sqlseed-0.1.12}/src/sqlseed/core/result.py +0 -0
  51. {sqlseed-0.1.11 → sqlseed-0.1.12}/src/sqlseed/core/schema.py +0 -0
  52. {sqlseed-0.1.11 → sqlseed-0.1.12}/src/sqlseed/core/transform.py +0 -0
  53. {sqlseed-0.1.11 → sqlseed-0.1.12}/src/sqlseed/core/unique_adjuster.py +0 -0
  54. {sqlseed-0.1.11 → sqlseed-0.1.12}/src/sqlseed/database/__init__.py +0 -0
  55. {sqlseed-0.1.11 → sqlseed-0.1.12}/src/sqlseed/database/_base_adapter.py +0 -0
  56. {sqlseed-0.1.11 → sqlseed-0.1.12}/src/sqlseed/database/_compat.py +0 -0
  57. {sqlseed-0.1.11 → sqlseed-0.1.12}/src/sqlseed/database/_helpers.py +0 -0
  58. {sqlseed-0.1.11 → sqlseed-0.1.12}/src/sqlseed/database/_protocol.py +0 -0
  59. {sqlseed-0.1.11 → sqlseed-0.1.12}/src/sqlseed/database/optimizer.py +0 -0
  60. {sqlseed-0.1.11 → sqlseed-0.1.12}/src/sqlseed/generators/__init__.py +0 -0
  61. {sqlseed-0.1.11 → sqlseed-0.1.12}/src/sqlseed/generators/_dispatch.py +0 -0
  62. {sqlseed-0.1.11 → sqlseed-0.1.12}/src/sqlseed/generators/_json_helpers.py +0 -0
  63. {sqlseed-0.1.11 → sqlseed-0.1.12}/src/sqlseed/generators/_protocol.py +0 -0
  64. {sqlseed-0.1.11 → sqlseed-0.1.12}/src/sqlseed/generators/_string_helpers.py +0 -0
  65. {sqlseed-0.1.11 → sqlseed-0.1.12}/src/sqlseed/generators/base_provider.py +0 -0
  66. {sqlseed-0.1.11 → sqlseed-0.1.12}/src/sqlseed/generators/faker_provider.py +0 -0
  67. {sqlseed-0.1.11 → sqlseed-0.1.12}/src/sqlseed/generators/mimesis_provider.py +0 -0
  68. {sqlseed-0.1.11 → sqlseed-0.1.12}/src/sqlseed/generators/registry.py +0 -0
  69. {sqlseed-0.1.11 → sqlseed-0.1.12}/src/sqlseed/generators/stream.py +0 -0
  70. {sqlseed-0.1.11 → sqlseed-0.1.12}/src/sqlseed/plugins/__init__.py +0 -0
  71. {sqlseed-0.1.11 → sqlseed-0.1.12}/src/sqlseed/plugins/hookspecs.py +0 -0
  72. {sqlseed-0.1.11 → sqlseed-0.1.12}/src/sqlseed/plugins/manager.py +0 -0
  73. {sqlseed-0.1.11 → sqlseed-0.1.12}/src/sqlseed/py.typed +0 -0
@@ -17,7 +17,7 @@
17
17
  - `DataProvider` Protocol,含 `BaseProvider`、`FakerProvider`、`MimesisProvider`
18
18
  - `DataStream` 流式数据生成器,内存高效的批量处理
19
19
  - `RelationResolver` 外键依赖拓扑排序
20
- - 基于 `pluggy` 的插件系统,10 个 Hook 点
20
+ - 基于 `pluggy` 的插件系统,11 个 Hook 点
21
21
  - CLI 命令:`fill`、`preview`、`inspect`、`init`、`replay`、`ai-suggest`
22
22
  - Python API:`sqlseed.fill()`、`sqlseed.connect()`、`sqlseed.fill_from_config()`、`sqlseed.preview()`
23
23
  - YAML/JSON 配置文件支持
@@ -81,5 +81,6 @@
81
81
  - `ProviderRegistry.register_from_entry_points()` 修正 provider 类与普通插件入口点的区分逻辑,非 provider 入口点(如 `sqlseed_ai:plugin`)不再产生误报 warning
82
82
 
83
83
  ### 变更
84
+ - README / 开发文档与当前实现对齐:MCP tool 名称、Provider 协议示例、`sqlseed_shared_pool_loaded` 触发时机与 `UnknownGeneratorError` 行为说明已同步
84
85
  - 移除 `suggest.py`(`ColumnSuggester`)和 `nl_config.py`(`NLConfigGenerator`),其功能由 `SchemaAnalyzer` + `AiConfigRefiner` 完全替代。如有外部代码直接 `from sqlseed_ai.suggest import ColumnSuggester` 或 `from sqlseed_ai.nl_config import NLConfigGenerator`,将产生 `ImportError`
85
86
  - `plugins/sqlseed-ai/README.md` 功能描述与当前实际入口对齐,移除未对外提供的 "Column-level Suggestions" / "Natural Language Config" 描述
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sqlseed
3
- Version: 0.1.11
3
+ Version: 0.1.12
4
4
  Summary: Declarative SQLite test data generation toolkit
5
5
  Project-URL: Homepage, https://github.com/sunbos/sqlseed
6
6
  Project-URL: Documentation, https://github.com/sunbos/sqlseed#readme
@@ -89,7 +89,7 @@ print(result)
89
89
  | 零配置智能生成 | ✅ | ❌ | ❌ |
90
90
  | 外键自动维护 | ✅ | 手动 | 手动 |
91
91
  | 10 万行 + 数据 | ✅ 流式 | ⚠️ OOM | ❌ |
92
- | 列语义推断 | ✅ 8 级策略 | ❌ | ❌ |
92
+ | 列语义推断 | ✅ 9 级策略 | ❌ | ❌ |
93
93
  | 可重复生成 | ✅ seed | ⚠️ 手动 | ✅ |
94
94
  | AI 智能调优 | ✅ LLM | ❌ | ❌ |
95
95
  | 配置热重用 | ✅ YAML | ❌ | ❌ |
@@ -102,7 +102,7 @@ print(result)
102
102
 
103
103
  **🚀 零配置智能生成**
104
104
 
105
- 自动推断数据库 Schema,通过 8 级策略链为每列选择最佳生成器。列名是 `email`?生成邮箱。列名是 `*_at`?生成时间戳。完全不需要配置。
105
+ 自动推断数据库 Schema,通过 9 级策略链为每列选择最佳生成器。列名是 `email`?生成邮箱。列名是 `*_at`?生成时间戳。完全不需要配置。
106
106
 
107
107
  </td>
108
108
  <td width="50%">
@@ -686,7 +686,7 @@ python -m mcp_server_sqlseed
686
686
 
687
687
  > "分析 `app.db` 中 `bank_cards` 表的结构,生成 YAML 配置,然后填充 5000 行数据。"
688
688
 
689
- AI 助手会依次调用 `inspect_schema` → `generate_yaml` → `execute_fill`,无需你手动编写任何代码。
689
+ AI 助手会依次调用 `sqlseed_inspect_schema` → `sqlseed_generate_yaml` → `sqlseed_execute_fill`,无需你手动编写任何代码。
690
690
 
691
691
  ***
692
692
 
@@ -699,6 +699,8 @@ AI 助手会依次调用 `inspect_schema` → `generate_yaml` → `execute_fill`
699
699
  from __future__ import annotations
700
700
  from typing import Any
701
701
 
702
+ from sqlseed.generators import UnknownGeneratorError
703
+
702
704
  class MyCustomProvider:
703
705
  """实现 DataProvider Protocol 即可。不需要继承任何基类。"""
704
706
 
@@ -715,13 +717,19 @@ class MyCustomProvider:
715
717
  def set_seed(self, seed: int) -> None:
716
718
  ...
717
719
 
718
- def generate_string(self, *, min_length: int = 1, max_length: int = 100, charset: str | None = None) -> str:
719
- return "custom_string"
720
+ def generate(self, type_name: str, **params: Any) -> Any:
721
+ if type_name == "string":
722
+ return "custom_string"
723
+ if type_name == "email":
724
+ return "user@example.com"
725
+ raise UnknownGeneratorError(type_name)
720
726
 
721
- # ... 实现其余 generate_* 方法
727
+ # ... 按需处理你要支持的 generator 名称
722
728
  # 完整 Protocol 参见:src/sqlseed/generators/_protocol.py
723
729
  ```
724
730
 
731
+ 如果你想复用内置的 generator name 分发逻辑,而不是手写 `generate()` 的路由,也可以直接继承 `BaseProvider` 后覆盖局部行为。
732
+
725
733
  **注册方式 1:通过** **`pyproject.toml`** **entry-point(推荐)**
726
734
 
727
735
  ```toml
@@ -750,7 +758,7 @@ class MyPlugin:
750
758
  # 📋 数据生成
751
759
  # ═══════════════════════════════════════
752
760
 
753
- # 零配置填充
761
+ # 填充数据(--count 在非 --config 模式下必填)
754
762
  sqlseed fill app.db --table users --count 10000
755
763
 
756
764
  # 完整参数
@@ -762,12 +770,15 @@ sqlseed fill app.db -t users -n 100000 \
762
770
  --clear \
763
771
  --snapshot
764
772
 
765
- # YAML 配置驱动
773
+ # YAML 配置驱动(count 来自配置文件)
766
774
  sqlseed fill --config generate.yaml
767
775
 
768
776
  # Transform 脚本
769
777
  sqlseed fill app.db -t users -n 10000 --transform transform.py
770
778
 
779
+ # 开启 debug 日志
780
+ SQLSEED_LOG_LEVEL=DEBUG sqlseed fill app.db -t users -n 10
781
+
771
782
  # ═══════════════════════════════════════
772
783
  # 🔍 查看与预览
773
784
  # ═══════════════════════════════════════
@@ -802,34 +813,37 @@ sqlseed ai-suggest app.db -t users -o users.yaml --model gpt-4o --verify
802
813
 
803
814
  ***
804
815
 
805
- ## 🧠 8 级智能列映射
816
+ ## 🧠 9 级智能列映射
806
817
 
807
- sqlseed 的核心亮点之一是 `ColumnMapper` 的 8 级策略链。每一列都会按以下优先级尝试匹配:
818
+ sqlseed 的核心亮点之一是 `ColumnMapper` 的 9 级策略链。每一列都会按以下优先级尝试匹配:
808
819
 
809
820
  ```
810
821
  Level 1 │ 用户配置 columns={"email": "email"} 最高优先级
811
822
 
812
823
  Level 2 │ 自定义精确匹配 通过插件 Hook 注册的规则
813
824
 
814
- Level 3 │ 内置精确匹配 67 条规则:email→email, phone→phone, age→integer...
825
+ Level 3 │ 内置精确匹配 68 条规则:email→email, phone→phone, age→integer...
826
+
827
+ Level 4 │ DEFAULT 检查 有默认值 → skip(精确匹配优先于 DEFAULT)
815
828
 
816
- Level 4 │ 自定义模式匹配 通过插件 Hook 注册的正则规则
829
+ Level 5 │ 自定义模式匹配 通过插件 Hook 注册的正则规则
817
830
 
818
- Level 5 │ 内置模式匹配 25 条正则:*_at→datetime, *_id→foreign_key, is_*→boolean...
831
+ Level 6 │ 内置模式匹配 25 条正则:*_at→datetime, *_id→foreign_key, is_*→boolean...
819
832
 
820
- Level 6跳过 有默认值或可 NULL → skip
833
+ Level 7NULLABLE 回退 可 NULL → skip
821
834
 
822
- Level 7 │ 类型忠实回退 VARCHAR(32)→最长32字符, INT8→0~255, BLOB(1024)→1024字节
835
+ Level 8 │ 类型忠实回退 VARCHAR(32)→最长32字符, INT8→0~255, BLOB(1024)→1024字节
823
836
 
824
- Level 8 │ 默认 string (min=5, max=50)
837
+ Level 9 │ 默认 string (min=5, max=50)
825
838
  ```
826
839
 
827
840
  这意味着:
828
841
 
829
- - 列名 `user_email` → Level 5 模式匹配 `*_email` → `email` 生成器 ✅
830
- - 列名 `is_verified` → Level 5 模式匹配 `is_*` → `boolean` 生成器 ✅
831
- - 列类型 `VARCHAR(20)` → Level 7 类型回退 → 最长 20 字符的字符串 ✅
832
- - 列有 `DEFAULT 1` → Level 6 → 跳过生成 ✅
842
+ - 列名 `user_email` → Level 6 模式匹配 `*_email` → `email` 生成器 ✅
843
+ - 列名 `is_verified` → Level 6 模式匹配 `is_*` → `boolean` 生成器 ✅
844
+ - 列类型 `VARCHAR(20)` → Level 8 类型回退 → 最长 20 字符的字符串 ✅
845
+ - 列有 `DEFAULT 1` → Level 4 → 跳过生成 ✅
846
+ - 列名 `gender` 有 `DEFAULT 'male'` → Level 3 精确匹配 → `choice` 生成器(精确匹配优先于 DEFAULT)✅
833
847
 
834
848
  ***
835
849
 
@@ -849,7 +863,7 @@ sqlseed 通过 [pluggy](https://pluggy.readthedocs.io/) 提供 11 个 Hook 点
849
863
  | `sqlseed_transform_batch` | <br /> | 逐批变换(支持链式处理) |
850
864
  | `sqlseed_before_insert` | <br /> | 每批写入 DB 前 |
851
865
  | `sqlseed_after_insert` | <br /> | 每批写入 DB 后 |
852
- | `sqlseed_shared_pool_loaded` | <br /> | 跨表共享池加载完成 |
866
+ | `sqlseed_shared_pool_loaded` | <br /> | SharedPool 注册后(值池已可读) |
853
867
 
854
868
  ***
855
869
 
@@ -860,7 +874,7 @@ src/sqlseed/
860
874
  ├── __init__.py # 公共 API (fill, connect, fill_from_config, preview)
861
875
  ├── core/ # ===== 核心编排层 =====
862
876
  │ ├── orchestrator.py # DataOrchestrator 主引擎
863
- │ ├── mapper.py # ColumnMapper 8 级策略链
877
+ │ ├── mapper.py # ColumnMapper 9 级策略链
864
878
  │ ├── schema.py # SchemaInferrer — 推断列、索引、数据分布
865
879
  │ ├── relation.py # RelationResolver + SharedPool — FK 与跨表共享
866
880
  │ ├── column_dag.py # ColumnDAG — 列依赖图 + 拓扑排序
@@ -869,12 +883,12 @@ src/sqlseed/
869
883
  │ ├── transform.py # TransformLoader — 用户脚本动态加载
870
884
  │ └── result.py # GenerationResult 数据类
871
885
  ├── generators/ # ===== 数据生成层 =====
872
- │ ├── _protocol.py # DataProvider Protocol (24 个 generate_* 方法)
886
+ │ ├── _protocol.py # DataProvider Protocol + UnknownGeneratorError
873
887
  │ ├── registry.py # ProviderRegistry (entry-point 自动发现)
874
888
  │ ├── base_provider.py # 内置基础生成器(零依赖)
875
889
  │ ├── faker_provider.py # Faker 适配器
876
890
  │ ├── mimesis_provider.py # Mimesis 适配器
877
- │ └── stream.py # DataStream 流式生成 + 约束回溯
891
+ │ └── stream.py # DataStream 流式生成 + 约束回溯 + choice/foreign_key 特判
878
892
  ├── database/ # ===== 数据库层 =====
879
893
  │ ├── _protocol.py # DatabaseAdapter Protocol (ColumnInfo, ForeignKeyInfo, IndexInfo)
880
894
  │ ├── sqlite_utils_adapter.py # 默认适配器
@@ -900,7 +914,7 @@ plugins/
900
914
  ├── sqlseed-ai/ # AI 插件 — LLM 驱动的智能配置
901
915
  │ └── src/sqlseed_ai/ # SchemaAnalyzer, AiConfigRefiner, Few-shot 示例...
902
916
  └── mcp-server-sqlseed/ # MCP 服务器 — AI 助手交互
903
- └── src/mcp_server_sqlseed/ # FastMCP 工具 (inspect/generate_yaml/execute_fill)
917
+ └── src/mcp_server_sqlseed/ # FastMCP 工具 (sqlseed_inspect_schema/sqlseed_generate_yaml/sqlseed_execute_fill)
904
918
  ```
905
919
 
906
920
  ***
@@ -36,7 +36,7 @@ print(result)
36
36
  | 零配置智能生成 | ✅ | ❌ | ❌ |
37
37
  | 外键自动维护 | ✅ | 手动 | 手动 |
38
38
  | 10 万行 + 数据 | ✅ 流式 | ⚠️ OOM | ❌ |
39
- | 列语义推断 | ✅ 8 级策略 | ❌ | ❌ |
39
+ | 列语义推断 | ✅ 9 级策略 | ❌ | ❌ |
40
40
  | 可重复生成 | ✅ seed | ⚠️ 手动 | ✅ |
41
41
  | AI 智能调优 | ✅ LLM | ❌ | ❌ |
42
42
  | 配置热重用 | ✅ YAML | ❌ | ❌ |
@@ -49,7 +49,7 @@ print(result)
49
49
 
50
50
  **🚀 零配置智能生成**
51
51
 
52
- 自动推断数据库 Schema,通过 8 级策略链为每列选择最佳生成器。列名是 `email`?生成邮箱。列名是 `*_at`?生成时间戳。完全不需要配置。
52
+ 自动推断数据库 Schema,通过 9 级策略链为每列选择最佳生成器。列名是 `email`?生成邮箱。列名是 `*_at`?生成时间戳。完全不需要配置。
53
53
 
54
54
  </td>
55
55
  <td width="50%">
@@ -633,7 +633,7 @@ python -m mcp_server_sqlseed
633
633
 
634
634
  > "分析 `app.db` 中 `bank_cards` 表的结构,生成 YAML 配置,然后填充 5000 行数据。"
635
635
 
636
- AI 助手会依次调用 `inspect_schema` → `generate_yaml` → `execute_fill`,无需你手动编写任何代码。
636
+ AI 助手会依次调用 `sqlseed_inspect_schema` → `sqlseed_generate_yaml` → `sqlseed_execute_fill`,无需你手动编写任何代码。
637
637
 
638
638
  ***
639
639
 
@@ -646,6 +646,8 @@ AI 助手会依次调用 `inspect_schema` → `generate_yaml` → `execute_fill`
646
646
  from __future__ import annotations
647
647
  from typing import Any
648
648
 
649
+ from sqlseed.generators import UnknownGeneratorError
650
+
649
651
  class MyCustomProvider:
650
652
  """实现 DataProvider Protocol 即可。不需要继承任何基类。"""
651
653
 
@@ -662,13 +664,19 @@ class MyCustomProvider:
662
664
  def set_seed(self, seed: int) -> None:
663
665
  ...
664
666
 
665
- def generate_string(self, *, min_length: int = 1, max_length: int = 100, charset: str | None = None) -> str:
666
- return "custom_string"
667
+ def generate(self, type_name: str, **params: Any) -> Any:
668
+ if type_name == "string":
669
+ return "custom_string"
670
+ if type_name == "email":
671
+ return "user@example.com"
672
+ raise UnknownGeneratorError(type_name)
667
673
 
668
- # ... 实现其余 generate_* 方法
674
+ # ... 按需处理你要支持的 generator 名称
669
675
  # 完整 Protocol 参见:src/sqlseed/generators/_protocol.py
670
676
  ```
671
677
 
678
+ 如果你想复用内置的 generator name 分发逻辑,而不是手写 `generate()` 的路由,也可以直接继承 `BaseProvider` 后覆盖局部行为。
679
+
672
680
  **注册方式 1:通过** **`pyproject.toml`** **entry-point(推荐)**
673
681
 
674
682
  ```toml
@@ -697,7 +705,7 @@ class MyPlugin:
697
705
  # 📋 数据生成
698
706
  # ═══════════════════════════════════════
699
707
 
700
- # 零配置填充
708
+ # 填充数据(--count 在非 --config 模式下必填)
701
709
  sqlseed fill app.db --table users --count 10000
702
710
 
703
711
  # 完整参数
@@ -709,12 +717,15 @@ sqlseed fill app.db -t users -n 100000 \
709
717
  --clear \
710
718
  --snapshot
711
719
 
712
- # YAML 配置驱动
720
+ # YAML 配置驱动(count 来自配置文件)
713
721
  sqlseed fill --config generate.yaml
714
722
 
715
723
  # Transform 脚本
716
724
  sqlseed fill app.db -t users -n 10000 --transform transform.py
717
725
 
726
+ # 开启 debug 日志
727
+ SQLSEED_LOG_LEVEL=DEBUG sqlseed fill app.db -t users -n 10
728
+
718
729
  # ═══════════════════════════════════════
719
730
  # 🔍 查看与预览
720
731
  # ═══════════════════════════════════════
@@ -749,34 +760,37 @@ sqlseed ai-suggest app.db -t users -o users.yaml --model gpt-4o --verify
749
760
 
750
761
  ***
751
762
 
752
- ## 🧠 8 级智能列映射
763
+ ## 🧠 9 级智能列映射
753
764
 
754
- sqlseed 的核心亮点之一是 `ColumnMapper` 的 8 级策略链。每一列都会按以下优先级尝试匹配:
765
+ sqlseed 的核心亮点之一是 `ColumnMapper` 的 9 级策略链。每一列都会按以下优先级尝试匹配:
755
766
 
756
767
  ```
757
768
  Level 1 │ 用户配置 columns={"email": "email"} 最高优先级
758
769
 
759
770
  Level 2 │ 自定义精确匹配 通过插件 Hook 注册的规则
760
771
 
761
- Level 3 │ 内置精确匹配 67 条规则:email→email, phone→phone, age→integer...
772
+ Level 3 │ 内置精确匹配 68 条规则:email→email, phone→phone, age→integer...
773
+
774
+ Level 4 │ DEFAULT 检查 有默认值 → skip(精确匹配优先于 DEFAULT)
762
775
 
763
- Level 4 │ 自定义模式匹配 通过插件 Hook 注册的正则规则
776
+ Level 5 │ 自定义模式匹配 通过插件 Hook 注册的正则规则
764
777
 
765
- Level 5 │ 内置模式匹配 25 条正则:*_at→datetime, *_id→foreign_key, is_*→boolean...
778
+ Level 6 │ 内置模式匹配 25 条正则:*_at→datetime, *_id→foreign_key, is_*→boolean...
766
779
 
767
- Level 6跳过 有默认值或可 NULL → skip
780
+ Level 7NULLABLE 回退 可 NULL → skip
768
781
 
769
- Level 7 │ 类型忠实回退 VARCHAR(32)→最长32字符, INT8→0~255, BLOB(1024)→1024字节
782
+ Level 8 │ 类型忠实回退 VARCHAR(32)→最长32字符, INT8→0~255, BLOB(1024)→1024字节
770
783
 
771
- Level 8 │ 默认 string (min=5, max=50)
784
+ Level 9 │ 默认 string (min=5, max=50)
772
785
  ```
773
786
 
774
787
  这意味着:
775
788
 
776
- - 列名 `user_email` → Level 5 模式匹配 `*_email` → `email` 生成器 ✅
777
- - 列名 `is_verified` → Level 5 模式匹配 `is_*` → `boolean` 生成器 ✅
778
- - 列类型 `VARCHAR(20)` → Level 7 类型回退 → 最长 20 字符的字符串 ✅
779
- - 列有 `DEFAULT 1` → Level 6 → 跳过生成 ✅
789
+ - 列名 `user_email` → Level 6 模式匹配 `*_email` → `email` 生成器 ✅
790
+ - 列名 `is_verified` → Level 6 模式匹配 `is_*` → `boolean` 生成器 ✅
791
+ - 列类型 `VARCHAR(20)` → Level 8 类型回退 → 最长 20 字符的字符串 ✅
792
+ - 列有 `DEFAULT 1` → Level 4 → 跳过生成 ✅
793
+ - 列名 `gender` 有 `DEFAULT 'male'` → Level 3 精确匹配 → `choice` 生成器(精确匹配优先于 DEFAULT)✅
780
794
 
781
795
  ***
782
796
 
@@ -796,7 +810,7 @@ sqlseed 通过 [pluggy](https://pluggy.readthedocs.io/) 提供 11 个 Hook 点
796
810
  | `sqlseed_transform_batch` | <br /> | 逐批变换(支持链式处理) |
797
811
  | `sqlseed_before_insert` | <br /> | 每批写入 DB 前 |
798
812
  | `sqlseed_after_insert` | <br /> | 每批写入 DB 后 |
799
- | `sqlseed_shared_pool_loaded` | <br /> | 跨表共享池加载完成 |
813
+ | `sqlseed_shared_pool_loaded` | <br /> | SharedPool 注册后(值池已可读) |
800
814
 
801
815
  ***
802
816
 
@@ -807,7 +821,7 @@ src/sqlseed/
807
821
  ├── __init__.py # 公共 API (fill, connect, fill_from_config, preview)
808
822
  ├── core/ # ===== 核心编排层 =====
809
823
  │ ├── orchestrator.py # DataOrchestrator 主引擎
810
- │ ├── mapper.py # ColumnMapper 8 级策略链
824
+ │ ├── mapper.py # ColumnMapper 9 级策略链
811
825
  │ ├── schema.py # SchemaInferrer — 推断列、索引、数据分布
812
826
  │ ├── relation.py # RelationResolver + SharedPool — FK 与跨表共享
813
827
  │ ├── column_dag.py # ColumnDAG — 列依赖图 + 拓扑排序
@@ -816,12 +830,12 @@ src/sqlseed/
816
830
  │ ├── transform.py # TransformLoader — 用户脚本动态加载
817
831
  │ └── result.py # GenerationResult 数据类
818
832
  ├── generators/ # ===== 数据生成层 =====
819
- │ ├── _protocol.py # DataProvider Protocol (24 个 generate_* 方法)
833
+ │ ├── _protocol.py # DataProvider Protocol + UnknownGeneratorError
820
834
  │ ├── registry.py # ProviderRegistry (entry-point 自动发现)
821
835
  │ ├── base_provider.py # 内置基础生成器(零依赖)
822
836
  │ ├── faker_provider.py # Faker 适配器
823
837
  │ ├── mimesis_provider.py # Mimesis 适配器
824
- │ └── stream.py # DataStream 流式生成 + 约束回溯
838
+ │ └── stream.py # DataStream 流式生成 + 约束回溯 + choice/foreign_key 特判
825
839
  ├── database/ # ===== 数据库层 =====
826
840
  │ ├── _protocol.py # DatabaseAdapter Protocol (ColumnInfo, ForeignKeyInfo, IndexInfo)
827
841
  │ ├── sqlite_utils_adapter.py # 默认适配器
@@ -847,7 +861,7 @@ plugins/
847
861
  ├── sqlseed-ai/ # AI 插件 — LLM 驱动的智能配置
848
862
  │ └── src/sqlseed_ai/ # SchemaAnalyzer, AiConfigRefiner, Few-shot 示例...
849
863
  └── mcp-server-sqlseed/ # MCP 服务器 — AI 助手交互
850
- └── src/mcp_server_sqlseed/ # FastMCP 工具 (inspect/generate_yaml/execute_fill)
864
+ └── src/mcp_server_sqlseed/ # FastMCP 工具 (sqlseed_inspect_schema/sqlseed_generate_yaml/sqlseed_execute_fill)
851
865
  ```
852
866
 
853
867
  ***
@@ -0,0 +1,8 @@
1
+ from __future__ import annotations
2
+
3
+ from importlib.metadata import PackageNotFoundError, version
4
+
5
+ try:
6
+ __version__ = version("sqlseed")
7
+ except PackageNotFoundError:
8
+ __version__ = "0.0.0+unknown"
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import os
3
4
  from typing import Any
4
5
 
5
6
  import click
@@ -10,12 +11,15 @@ from rich.table import Table as RichTable
10
11
  from sqlseed import fill as api_fill
11
12
  from sqlseed import fill_from_config
12
13
  from sqlseed import preview as api_preview
14
+ from sqlseed._utils.logger import configure_logging, get_logger
13
15
  from sqlseed._version import __version__
14
16
  from sqlseed.config.loader import generate_template, save_config
15
17
  from sqlseed.config.models import GeneratorConfig, ProviderType, TableConfig
16
18
  from sqlseed.config.snapshot import SnapshotManager
17
19
  from sqlseed.core.orchestrator import DataOrchestrator
18
20
 
21
+ logger = get_logger(__name__)
22
+
19
23
  try:
20
24
  from sqlseed_ai.analyzer import SchemaAnalyzer
21
25
  from sqlseed_ai.config import AIConfig
@@ -30,6 +34,8 @@ except ImportError:
30
34
  @click.version_option(version=__version__, prog_name="sqlseed")
31
35
  def cli() -> None:
32
36
  """sqlseed - Declarative SQLite test data generation toolkit."""
37
+ log_level = os.environ.get("SQLSEED_LOG_LEVEL", "WARNING").upper()
38
+ configure_logging(log_level)
33
39
 
34
40
 
35
41
  def _fill_from_config_cmd(config_path: str) -> None:
@@ -67,14 +73,34 @@ def _save_snapshot_cmd(
67
73
  click.echo(f"Snapshot saved: {snapshot_path}")
68
74
 
69
75
 
76
+ _FILL_DEFAULT_COUNT = 1000
77
+
78
+
70
79
  @cli.command()
71
80
  @click.argument("db_path", required=False)
72
81
  @click.option("--table", "-t", default=None, help="Target table name")
73
- @click.option("--count", "-n", default=1000, type=int, help="Number of rows to generate")
74
- @click.option("--provider", "-p", default="mimesis", help="Data provider (mimesis|faker|base)")
75
- @click.option("--locale", "-l", default="en_US", help="Locale for data generation")
82
+ @click.option(
83
+ "--count",
84
+ "-n",
85
+ default=None,
86
+ type=int,
87
+ help="Number of rows to generate (required when not using --config)",
88
+ )
89
+ @click.option(
90
+ "--provider",
91
+ "-p",
92
+ default="mimesis",
93
+ help="Data provider: mimesis|faker|base (default: mimesis)",
94
+ )
95
+ @click.option("--locale", "-l", default="en_US", help="Locale for data generation (default: en_US)")
76
96
  @click.option("--seed", "-s", default=None, type=int, help="Random seed for reproducibility")
77
- @click.option("--batch-size", "-b", default=5000, type=int, help="Batch size for insertion")
97
+ @click.option(
98
+ "--batch-size",
99
+ "-b",
100
+ default=5000,
101
+ type=int,
102
+ help="Batch size for insertion (default: 5000)",
103
+ )
78
104
  @click.option("--clear", is_flag=True, help="Clear table before generating")
79
105
  @click.option("--config", "-c", "config_path", default=None, help="YAML/JSON config file path")
80
106
  @click.option("--transform", "transform_path", default=None, help="Python transform script path")
@@ -84,14 +110,31 @@ def fill(**kwargs: Any) -> None:
84
110
  """Fill a table with generated test data.
85
111
 
86
112
  Use --config for config-driven generation, or provide db_path + --table
87
- for direct generation.
113
+ + --count for direct generation.
88
114
  """
115
+ count = kwargs.get("count")
116
+ config_path = kwargs.get("config_path")
117
+
118
+ if count is not None and count <= 0:
119
+ logger.debug("Invalid count value", count=count)
120
+ raise click.UsageError(f"--count must be greater than 0, got {count}")
121
+
122
+ if not config_path and count is None:
123
+ raise click.UsageError(
124
+ "--count is required when not using --config. Use -n <number> to specify the number of rows to generate."
125
+ )
126
+
127
+ if config_path and count is None:
128
+ count = _FILL_DEFAULT_COUNT
129
+
130
+ kwargs["count"] = count
89
131
  _execute_fill(kwargs)
90
132
 
91
133
 
92
134
  def _execute_fill(opts: dict[str, Any]) -> None:
93
135
  config_path = opts.get("config_path")
94
136
  if config_path:
137
+ logger.debug("Using config-driven generation", config_path=config_path)
95
138
  _fill_from_config_cmd(config_path)
96
139
  return
97
140
 
@@ -102,25 +145,32 @@ def _execute_fill(opts: dict[str, Any]) -> None:
102
145
  if not table:
103
146
  raise click.UsageError("--table is required when not using --config")
104
147
 
105
- result = api_fill(
106
- db_path,
107
- table=table,
108
- count=opts.get("count", 1000),
109
- provider=opts.get("provider", "mimesis"),
110
- locale=opts.get("locale", "en_US"),
111
- seed=opts.get("seed"),
112
- batch_size=opts.get("batch_size", 5000),
113
- clear_before=opts.get("clear", False),
114
- enrich=opts.get("enrich", False),
115
- transform=opts.get("transform_path"),
116
- )
148
+ count = opts.get("count", _FILL_DEFAULT_COUNT)
149
+ logger.debug("Starting fill", db_path=db_path, table=table, count=count)
150
+
151
+ try:
152
+ result = api_fill(
153
+ db_path,
154
+ table=table,
155
+ count=count,
156
+ provider=opts.get("provider", "mimesis"),
157
+ locale=opts.get("locale", "en_US"),
158
+ seed=opts.get("seed"),
159
+ batch_size=opts.get("batch_size", 5000),
160
+ clear_before=opts.get("clear", False),
161
+ enrich=opts.get("enrich", False),
162
+ transform=opts.get("transform_path"),
163
+ )
164
+ except ValueError as exc:
165
+ logger.debug("Fill failed with ValueError", error=str(exc))
166
+ raise click.UsageError(str(exc)) from exc
117
167
  click.echo(str(result))
118
168
 
119
169
  if opts.get("snapshot"):
120
170
  _save_snapshot_cmd(
121
171
  db_path,
122
172
  table,
123
- opts.get("count", 1000),
173
+ count,
124
174
  opts.get("provider", "mimesis"),
125
175
  opts.get("locale", "en_US"),
126
176
  opts.get("seed"),
@@ -132,9 +182,14 @@ def _execute_fill(opts: dict[str, Any]) -> None:
132
182
  @cli.command()
133
183
  @click.argument("db_path")
134
184
  @click.option("--table", "-t", required=True, help="Target table name")
135
- @click.option("--count", "-n", default=5, type=int, help="Number of rows to preview")
136
- @click.option("--provider", "-p", default="mimesis", help="Data provider")
137
- @click.option("--locale", "-l", default="en_US", help="Locale")
185
+ @click.option("--count", "-n", default=5, type=int, help="Number of rows to preview (default: 5)")
186
+ @click.option(
187
+ "--provider",
188
+ "-p",
189
+ default="mimesis",
190
+ help="Data provider: mimesis|faker|base (default: mimesis)",
191
+ )
192
+ @click.option("--locale", "-l", default="en_US", help="Locale (default: en_US)")
138
193
  @click.option("--seed", "-s", default=None, type=int, help="Random seed")
139
194
  def preview(
140
195
  db_path: str,
@@ -228,7 +283,7 @@ def inspect(db_path: str, table: str | None, show_mapping: bool) -> None:
228
283
 
229
284
  @cli.command()
230
285
  @click.argument("config_path")
231
- @click.option("--db", default="test.db", help="Database path for template")
286
+ @click.option("--db", default="test.db", help="Database path for template (default: test.db)")
232
287
  def init(config_path: str, db: str) -> None:
233
288
  """Generate a YAML configuration template."""
234
289
  config = generate_template(db)
@@ -274,11 +329,16 @@ def _handle_ai_direct(analyzer: Any, db_path: str, table: str) -> Any:
274
329
  @click.argument("db_path")
275
330
  @click.option("--table", "-t", required=True, help="Target table name")
276
331
  @click.option("--output", "-o", required=True, help="Output YAML file path")
277
- @click.option("--model", "-m", default=None, help="AI model name (default: qwen3-coder-plus)")
278
- @click.option("--api-key", envvar="SQLSEED_AI_API_KEY", default=None, help="AI API key")
279
- @click.option("--base-url", envvar="SQLSEED_AI_BASE_URL", default=None, help="AI API base URL")
280
- @click.option("--max-retries", default=3, type=int, help="Max refinement retries (0=disable)")
281
- @click.option("--verify/--no-verify", default=True, help="Enable AI config self-correction")
332
+ @click.option("--model", "-m", default=None, help="AI model name (default: gpt-4o)")
333
+ @click.option("--api-key", envvar="SQLSEED_AI_API_KEY", default=None, help="AI API key (env: SQLSEED_AI_API_KEY)")
334
+ @click.option(
335
+ "--base-url",
336
+ envvar="SQLSEED_AI_BASE_URL",
337
+ default=None,
338
+ help="AI API base URL (env: SQLSEED_AI_BASE_URL)",
339
+ )
340
+ @click.option("--max-retries", default=3, type=int, help="Max refinement retries, 0=disable (default: 3)")
341
+ @click.option("--verify/--no-verify", default=True, help="Enable AI config self-correction (default: verify)")
282
342
  @click.option("--no-cache", is_flag=True, help="Skip cached AI configs")
283
343
  def ai_suggest(
284
344
  db_path: str,
@@ -231,6 +231,20 @@ class ColumnMapper:
231
231
  return GeneratorSpec(generator_name="skip")
232
232
  return None
233
233
 
234
+ def _map_from_default(
235
+ self, column_info: ColumnInfo, column_type: str, enrich: bool, force_type_infer: bool
236
+ ) -> GeneratorSpec | None:
237
+ if column_info.default is not None:
238
+ if force_type_infer:
239
+ return self._type_faithful_fallback(column_type)
240
+ if enrich:
241
+ return GeneratorSpec(
242
+ generator_name="__enrich__",
243
+ params={"_default": column_info.default, "_nullable": column_info.nullable},
244
+ )
245
+ return GeneratorSpec(generator_name="skip")
246
+ return None
247
+
234
248
  def map_column(
235
249
  self,
236
250
  column_info: ColumnInfo,
@@ -255,6 +269,10 @@ class ColumnMapper:
255
269
  if exact_match:
256
270
  return exact_match
257
271
 
272
+ default_spec = self._map_from_default(column_info, column_type, enrich, force_type_infer)
273
+ if default_spec:
274
+ return default_spec
275
+
258
276
  pattern_match = self._match_pattern(column_name)
259
277
  if pattern_match:
260
278
  return pattern_match
@@ -229,6 +229,8 @@ class DataOrchestrator:
229
229
  ) -> GenerationResult:
230
230
  self._ensure_connected()
231
231
  validate_table_name(table_name)
232
+ if count <= 0:
233
+ raise ValueError(f"count must be greater than 0, got {count}")
232
234
  start_time = time.monotonic()
233
235
  total_inserted = 0
234
236
  batch_count = 0
@@ -127,6 +127,7 @@ class RawSQLiteAdapter(BaseSQLiteAdapter):
127
127
  validate_table_name(table_name)
128
128
  safe_table = quote_identifier(table_name)
129
129
  self.conn.execute(f"DELETE FROM {safe_table}")
130
+ self.conn.execute(f"DELETE FROM sqlite_sequence WHERE name = {safe_table}")
130
131
  self.conn.commit()
131
132
  logger.debug("Cleared table", table_name=table_name)
132
133
 
@@ -134,6 +134,7 @@ class SQLiteUtilsAdapter(BaseSQLiteAdapter):
134
134
  validate_table_name(table_name)
135
135
  safe_table = quote_identifier(table_name)
136
136
  self._db.execute(f"DELETE FROM {safe_table}")
137
+ self._db.execute(f"DELETE FROM sqlite_sequence WHERE name = {safe_table}")
137
138
  logger.debug("Cleared table", table_name=table_name)
138
139
 
139
140
  def _execute_pragma(self, sql: str) -> None:
@@ -1,3 +0,0 @@
1
- from __future__ import annotations
2
-
3
- __version__ = "0.1.0"
File without changes
File without changes
File without changes
File without changes