sqlseed 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. sqlseed/__init__.py +121 -0
  2. sqlseed/_utils/__init__.py +11 -0
  3. sqlseed/_utils/logger.py +30 -0
  4. sqlseed/_utils/metrics.py +45 -0
  5. sqlseed/_utils/progress.py +14 -0
  6. sqlseed/_utils/schema_helpers.py +51 -0
  7. sqlseed/_utils/sql_safe.py +45 -0
  8. sqlseed/_version.py +1 -0
  9. sqlseed/cli/__init__.py +3 -0
  10. sqlseed/cli/main.py +316 -0
  11. sqlseed/config/__init__.py +14 -0
  12. sqlseed/config/loader.py +66 -0
  13. sqlseed/config/models.py +99 -0
  14. sqlseed/config/snapshot.py +91 -0
  15. sqlseed/core/__init__.py +14 -0
  16. sqlseed/core/column_dag.py +108 -0
  17. sqlseed/core/constraints.py +116 -0
  18. sqlseed/core/expression.py +71 -0
  19. sqlseed/core/mapper.py +257 -0
  20. sqlseed/core/orchestrator.py +578 -0
  21. sqlseed/core/relation.py +124 -0
  22. sqlseed/core/result.py +23 -0
  23. sqlseed/core/schema.py +97 -0
  24. sqlseed/core/transform.py +27 -0
  25. sqlseed/database/__init__.py +14 -0
  26. sqlseed/database/_protocol.py +72 -0
  27. sqlseed/database/optimizer.py +96 -0
  28. sqlseed/database/raw_sqlite_adapter.py +197 -0
  29. sqlseed/database/sqlite_utils_adapter.py +183 -0
  30. sqlseed/generators/__init__.py +11 -0
  31. sqlseed/generators/_protocol.py +73 -0
  32. sqlseed/generators/base_provider.py +448 -0
  33. sqlseed/generators/faker_provider.py +157 -0
  34. sqlseed/generators/mimesis_provider.py +203 -0
  35. sqlseed/generators/registry.py +86 -0
  36. sqlseed/generators/stream.py +157 -0
  37. sqlseed/py.typed +0 -0
  38. sqlseed-0.1.0.dist-info/METADATA +934 -0
  39. sqlseed-0.1.0.dist-info/RECORD +42 -0
  40. sqlseed-0.1.0.dist-info/WHEEL +4 -0
  41. sqlseed-0.1.0.dist-info/entry_points.txt +6 -0
  42. sqlseed-0.1.0.dist-info/licenses/LICENSE +17 -0
sqlseed/core/mapper.py ADDED
@@ -0,0 +1,257 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ from dataclasses import dataclass, field
5
+ from typing import TYPE_CHECKING, Any, ClassVar
6
+
7
+ if TYPE_CHECKING:
8
+ from sqlseed.database._protocol import ColumnInfo
9
+
10
+
11
+ @dataclass
12
+ class GeneratorSpec:
13
+ generator_name: str
14
+ params: dict[str, Any] = field(default_factory=dict)
15
+ null_ratio: float = 0.0
16
+ provider: str | None = None
17
+
18
+
19
+ class ColumnMapper:
20
+ EXACT_MATCH_RULES: ClassVar[dict[str, str]] = {
21
+ "email": "email",
22
+ "phone": "phone",
23
+ "telephone": "phone",
24
+ "mobile": "phone",
25
+ "address": "address",
26
+ "name": "name",
27
+ "username": "name",
28
+ "user_name": "name",
29
+ "nickname": "name",
30
+ "first_name": "first_name",
31
+ "last_name": "last_name",
32
+ "full_name": "name",
33
+ "company": "company",
34
+ "organization": "company",
35
+ "ip": "ipv4",
36
+ "ip_address": "ipv4",
37
+ "url": "url",
38
+ "website": "url",
39
+ "homepage": "url",
40
+ "avatar": "url",
41
+ "avatar_url": "url",
42
+ "uuid": "uuid",
43
+ "guid": "uuid",
44
+ "token": "uuid",
45
+ "password": "password",
46
+ "passwd": "password",
47
+ "secret": "password",
48
+ "status": "choice",
49
+ "state": "choice",
50
+ "gender": "choice",
51
+ "sex": "choice",
52
+ "type": "choice",
53
+ "level": "choice",
54
+ "priority": "choice",
55
+ "role": "choice",
56
+ "age": "integer",
57
+ "count": "integer",
58
+ "quantity": "integer",
59
+ "amount": "float",
60
+ "price": "float",
61
+ "cost": "float",
62
+ "salary": "float",
63
+ "balance": "float",
64
+ "score": "float",
65
+ "rating": "float",
66
+ "weight": "float",
67
+ "height": "float",
68
+ "title": "sentence",
69
+ "subject": "sentence",
70
+ "headline": "sentence",
71
+ "bio": "text",
72
+ "biography": "text",
73
+ "description": "text",
74
+ "summary": "text",
75
+ "content": "text",
76
+ "body": "text",
77
+ "comment": "text",
78
+ "note": "text",
79
+ "remark": "text",
80
+ "latitude": "float",
81
+ "longitude": "float",
82
+ "lat": "float",
83
+ "lng": "float",
84
+ "city": "sentence",
85
+ "country": "sentence",
86
+ "zip_code": "string",
87
+ "postal_code": "string",
88
+ }
89
+
90
+ EXACT_MATCH_PARAMS: ClassVar[dict[str, dict[str, Any]]] = {
91
+ "age": {"min_value": 18, "max_value": 100},
92
+ "count": {"min_value": 0, "max_value": 10000},
93
+ "quantity": {"min_value": 1, "max_value": 100},
94
+ "amount": {"min_value": 0.01, "max_value": 99999.99, "precision": 2},
95
+ "price": {"min_value": 0.01, "max_value": 9999.99, "precision": 2},
96
+ "cost": {"min_value": 0.01, "max_value": 9999.99, "precision": 2},
97
+ "salary": {"min_value": 3000.0, "max_value": 100000.0, "precision": 2},
98
+ "balance": {"min_value": 0.0, "max_value": 999999.99, "precision": 2},
99
+ "score": {"min_value": 0.0, "max_value": 100.0, "precision": 1},
100
+ "rating": {"min_value": 1.0, "max_value": 5.0, "precision": 1},
101
+ "weight": {"min_value": 0.1, "max_value": 500.0, "precision": 1},
102
+ "height": {"min_value": 50.0, "max_value": 250.0, "precision": 1},
103
+ "latitude": {"min_value": -90.0, "max_value": 90.0, "precision": 6},
104
+ "longitude": {"min_value": -180.0, "max_value": 180.0, "precision": 6},
105
+ "lat": {"min_value": -90.0, "max_value": 90.0, "precision": 6},
106
+ "lng": {"min_value": -180.0, "max_value": 180.0, "precision": 6},
107
+ "status": {"choices": [0, 1]},
108
+ "state": {"choices": [0, 1, 2]},
109
+ "gender": {"choices": ["male", "female", "other"]},
110
+ "sex": {"choices": ["male", "female"]},
111
+ "type": {"choices": [1, 2, 3]},
112
+ "level": {"choices": [1, 2, 3, 4, 5]},
113
+ "priority": {"choices": ["low", "medium", "high"]},
114
+ "role": {"choices": ["admin", "user", "guest"]},
115
+ "bio": {"min_length": 50, "max_length": 200},
116
+ "description": {"min_length": 100, "max_length": 500},
117
+ "content": {"min_length": 200, "max_length": 1000},
118
+ "comment": {"min_length": 10, "max_length": 200},
119
+ }
120
+
121
+ PATTERN_MATCH_RULES: ClassVar[list[tuple[str, str, dict[str, Any]]]] = [
122
+ (r"^id$", "autoincrement", {}),
123
+ (r".*_id$", "foreign_key_or_integer", {}),
124
+ (r".*_ids$", "json", {}),
125
+ (r".*_at$", "datetime", {}),
126
+ (r".*_date$", "date", {}),
127
+ (r".*_time$", "datetime", {}),
128
+ (r".*_timestamp$", "timestamp", {}),
129
+ (r"^created$", "datetime", {}),
130
+ (r"^updated$", "datetime", {}),
131
+ (r"^deleted$", "datetime", {}),
132
+ (r".*_count$|.*_num$|.*_number$", "integer", {"min_value": 0, "max_value": 10000}),
133
+ (r".*_amount$|.*_price$|.*_cost$|.*_fee$", "float", {"min_value": 0.01, "max_value": 99999.99, "precision": 2}),
134
+ (r".*_rate$|.*_ratio$|.*_percent$", "float", {"min_value": 0.0, "max_value": 1.0, "precision": 4}),
135
+ (r"^is_.*|^has_.*|^can_.*|^should_.*|^enable.*|^disable.*", "boolean", {}),
136
+ (r".*_code$", "string", {"min_length": 6, "max_length": 12, "charset": "alphanumeric"}),
137
+ (r".*_name$", "name", {}),
138
+ (r".*_email$", "email", {}),
139
+ (r".*_phone$|.*_tel$|.*_mobile$", "phone", {}),
140
+ (r".*_url$|.*_link$|.*_href$", "url", {}),
141
+ (r".*_path$|.*_file$", "string", {"min_length": 10, "max_length": 100}),
142
+ (r".*_key$|.*_token$|.*_hash$", "uuid", {}),
143
+ (r".*_password$|.*_passwd$|.*_secret$", "password", {}),
144
+ (r".*_address$", "address", {}),
145
+ (r".*_description$|.*_desc$|.*_text$|.*_content$|.*_body$", "text", {"min_length": 50, "max_length": 300}),
146
+ (r".*_title$|.*_subject$|.*_headline$", "sentence", {}),
147
+ ]
148
+
149
+ TYPE_FALLBACK_RULES: ClassVar[dict[str, tuple[str, dict[str, Any]]]] = {
150
+ "INTEGER": ("integer", {"min_value": 0, "max_value": 999999}),
151
+ "INT8": ("integer", {"min_value": 0, "max_value": 255}),
152
+ "INT16": ("integer", {"min_value": 0, "max_value": 65535}),
153
+ "INT32": ("integer", {"min_value": 0, "max_value": 2147483647}),
154
+ "INT64": ("integer", {"min_value": 0, "max_value": 999999999}),
155
+ "INT": ("integer", {"min_value": 0, "max_value": 999999}),
156
+ "TINYINT": ("integer", {"min_value": 0, "max_value": 255}),
157
+ "SMALLINT": ("integer", {"min_value": 0, "max_value": 32767}),
158
+ "BIGINT": ("integer", {"min_value": 0, "max_value": 999999999}),
159
+ "REAL": ("float", {"min_value": 0.0, "max_value": 999999.0, "precision": 2}),
160
+ "FLOAT": ("float", {"min_value": 0.0, "max_value": 999999.0, "precision": 2}),
161
+ "DOUBLE": ("float", {"min_value": 0.0, "max_value": 999999.0, "precision": 2}),
162
+ "DECIMAL": ("float", {"min_value": 0.0, "max_value": 999999.0, "precision": 2}),
163
+ "NUMERIC": ("float", {"min_value": 0.0, "max_value": 999999.0}),
164
+ "TEXT": ("string", {"min_length": 5, "max_length": 50}),
165
+ "BLOB": ("bytes", {"length": 32}),
166
+ "BOOLEAN": ("boolean", {}),
167
+ "DATE": ("date", {}),
168
+ "DATETIME": ("datetime", {}),
169
+ "TIMESTAMP": ("timestamp", {}),
170
+ "VARCHAR": ("string", {}),
171
+ "CHAR": ("string", {}),
172
+ }
173
+
174
+ def __init__(self) -> None:
175
+ self._custom_exact_rules: dict[str, tuple[str, dict[str, Any]]] = {}
176
+ self._custom_pattern_rules: list[tuple[str, str, dict[str, Any]]] = []
177
+
178
+ def register_exact_rule(self, column_name: str, generator: str, params: dict[str, Any] | None = None) -> None:
179
+ self._custom_exact_rules[column_name.lower()] = (generator, params or {})
180
+
181
+ def register_pattern_rule(self, pattern: str, generator: str, params: dict[str, Any] | None = None) -> None:
182
+ self._custom_pattern_rules.append((pattern, generator, params or {}))
183
+
184
+ def map_column(self, column_info: ColumnInfo, user_config: Any = None) -> GeneratorSpec:
185
+ column_name = column_info.name.lower()
186
+ column_type = column_info.type.upper() if column_info.type else "TEXT"
187
+
188
+ if column_info.is_primary_key and (
189
+ column_info.is_autoincrement or "INTEGER" in column_type or "INT" in column_type
190
+ ):
191
+ return GeneratorSpec(generator_name="skip")
192
+
193
+ if user_config and hasattr(user_config, "generator") and user_config.generator:
194
+ provider_val = (
195
+ user_config.provider.value if hasattr(user_config, "provider") and user_config.provider else None
196
+ )
197
+ return GeneratorSpec(
198
+ generator_name=user_config.generator,
199
+ params=user_config.params if hasattr(user_config, "params") else {},
200
+ null_ratio=user_config.null_ratio if hasattr(user_config, "null_ratio") else 0.0,
201
+ provider=provider_val,
202
+ )
203
+
204
+ if column_name in self._custom_exact_rules:
205
+ gen, params = self._custom_exact_rules[column_name]
206
+ return GeneratorSpec(generator_name=gen, params=params)
207
+
208
+ if column_name in self.EXACT_MATCH_RULES:
209
+ gen = self.EXACT_MATCH_RULES[column_name]
210
+ params = self.EXACT_MATCH_PARAMS.get(column_name, {})
211
+ return GeneratorSpec(generator_name=gen, params=params)
212
+
213
+ for pattern, gen, params in self._custom_pattern_rules:
214
+ if re.match(pattern, column_name):
215
+ return GeneratorSpec(generator_name=gen, params=params)
216
+
217
+ for pattern, gen, params in self.PATTERN_MATCH_RULES:
218
+ if re.match(pattern, column_name):
219
+ return GeneratorSpec(generator_name=gen, params=params)
220
+
221
+ if column_info.default is not None or column_info.nullable:
222
+ return GeneratorSpec(generator_name="skip")
223
+
224
+ return self._type_faithful_fallback(column_type)
225
+
226
+ def _type_faithful_fallback(self, column_type: str) -> GeneratorSpec:
227
+ import re as _re
228
+
229
+ length_match = _re.search(r"\((\d+)\)", column_type)
230
+ max_length = int(length_match.group(1)) if length_match else None
231
+
232
+ base_type = _re.sub(r"\(.*\)", "", column_type).strip()
233
+
234
+ for type_prefix, (gen, default_params) in self.TYPE_FALLBACK_RULES.items():
235
+ if base_type.startswith(type_prefix):
236
+ params = dict(default_params)
237
+ if max_length is not None:
238
+ if gen == "string":
239
+ params["min_length"] = 1
240
+ params["max_length"] = max_length
241
+ elif gen == "bytes":
242
+ params["length"] = max_length
243
+ return GeneratorSpec(generator_name=gen, params=params)
244
+
245
+ return GeneratorSpec(generator_name="string", params={"min_length": 5, "max_length": 50})
246
+
247
+ def map_columns(
248
+ self,
249
+ columns: list[ColumnInfo],
250
+ user_configs: dict[str, Any] | None = None,
251
+ ) -> dict[str, GeneratorSpec]:
252
+ user_configs = user_configs or {}
253
+ result: dict[str, GeneratorSpec] = {}
254
+ for col in columns:
255
+ col_config = user_configs.get(col.name)
256
+ result[col.name] = self.map_column(col, col_config)
257
+ return result