sqlseed 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlseed/__init__.py +121 -0
- sqlseed/_utils/__init__.py +11 -0
- sqlseed/_utils/logger.py +30 -0
- sqlseed/_utils/metrics.py +45 -0
- sqlseed/_utils/progress.py +14 -0
- sqlseed/_utils/schema_helpers.py +51 -0
- sqlseed/_utils/sql_safe.py +45 -0
- sqlseed/_version.py +1 -0
- sqlseed/cli/__init__.py +3 -0
- sqlseed/cli/main.py +316 -0
- sqlseed/config/__init__.py +14 -0
- sqlseed/config/loader.py +66 -0
- sqlseed/config/models.py +99 -0
- sqlseed/config/snapshot.py +91 -0
- sqlseed/core/__init__.py +14 -0
- sqlseed/core/column_dag.py +108 -0
- sqlseed/core/constraints.py +116 -0
- sqlseed/core/expression.py +71 -0
- sqlseed/core/mapper.py +257 -0
- sqlseed/core/orchestrator.py +578 -0
- sqlseed/core/relation.py +124 -0
- sqlseed/core/result.py +23 -0
- sqlseed/core/schema.py +97 -0
- sqlseed/core/transform.py +27 -0
- sqlseed/database/__init__.py +14 -0
- sqlseed/database/_protocol.py +72 -0
- sqlseed/database/optimizer.py +96 -0
- sqlseed/database/raw_sqlite_adapter.py +197 -0
- sqlseed/database/sqlite_utils_adapter.py +183 -0
- sqlseed/generators/__init__.py +11 -0
- sqlseed/generators/_protocol.py +73 -0
- sqlseed/generators/base_provider.py +448 -0
- sqlseed/generators/faker_provider.py +157 -0
- sqlseed/generators/mimesis_provider.py +203 -0
- sqlseed/generators/registry.py +86 -0
- sqlseed/generators/stream.py +157 -0
- sqlseed/py.typed +0 -0
- sqlseed-0.1.0.dist-info/METADATA +934 -0
- sqlseed-0.1.0.dist-info/RECORD +42 -0
- sqlseed-0.1.0.dist-info/WHEEL +4 -0
- sqlseed-0.1.0.dist-info/entry_points.txt +6 -0
- sqlseed-0.1.0.dist-info/licenses/LICENSE +17 -0
sqlseed/core/mapper.py
ADDED
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
from typing import TYPE_CHECKING, Any, ClassVar
|
|
6
|
+
|
|
7
|
+
if TYPE_CHECKING:
|
|
8
|
+
from sqlseed.database._protocol import ColumnInfo
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class GeneratorSpec:
|
|
13
|
+
generator_name: str
|
|
14
|
+
params: dict[str, Any] = field(default_factory=dict)
|
|
15
|
+
null_ratio: float = 0.0
|
|
16
|
+
provider: str | None = None
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ColumnMapper:
|
|
20
|
+
EXACT_MATCH_RULES: ClassVar[dict[str, str]] = {
|
|
21
|
+
"email": "email",
|
|
22
|
+
"phone": "phone",
|
|
23
|
+
"telephone": "phone",
|
|
24
|
+
"mobile": "phone",
|
|
25
|
+
"address": "address",
|
|
26
|
+
"name": "name",
|
|
27
|
+
"username": "name",
|
|
28
|
+
"user_name": "name",
|
|
29
|
+
"nickname": "name",
|
|
30
|
+
"first_name": "first_name",
|
|
31
|
+
"last_name": "last_name",
|
|
32
|
+
"full_name": "name",
|
|
33
|
+
"company": "company",
|
|
34
|
+
"organization": "company",
|
|
35
|
+
"ip": "ipv4",
|
|
36
|
+
"ip_address": "ipv4",
|
|
37
|
+
"url": "url",
|
|
38
|
+
"website": "url",
|
|
39
|
+
"homepage": "url",
|
|
40
|
+
"avatar": "url",
|
|
41
|
+
"avatar_url": "url",
|
|
42
|
+
"uuid": "uuid",
|
|
43
|
+
"guid": "uuid",
|
|
44
|
+
"token": "uuid",
|
|
45
|
+
"password": "password",
|
|
46
|
+
"passwd": "password",
|
|
47
|
+
"secret": "password",
|
|
48
|
+
"status": "choice",
|
|
49
|
+
"state": "choice",
|
|
50
|
+
"gender": "choice",
|
|
51
|
+
"sex": "choice",
|
|
52
|
+
"type": "choice",
|
|
53
|
+
"level": "choice",
|
|
54
|
+
"priority": "choice",
|
|
55
|
+
"role": "choice",
|
|
56
|
+
"age": "integer",
|
|
57
|
+
"count": "integer",
|
|
58
|
+
"quantity": "integer",
|
|
59
|
+
"amount": "float",
|
|
60
|
+
"price": "float",
|
|
61
|
+
"cost": "float",
|
|
62
|
+
"salary": "float",
|
|
63
|
+
"balance": "float",
|
|
64
|
+
"score": "float",
|
|
65
|
+
"rating": "float",
|
|
66
|
+
"weight": "float",
|
|
67
|
+
"height": "float",
|
|
68
|
+
"title": "sentence",
|
|
69
|
+
"subject": "sentence",
|
|
70
|
+
"headline": "sentence",
|
|
71
|
+
"bio": "text",
|
|
72
|
+
"biography": "text",
|
|
73
|
+
"description": "text",
|
|
74
|
+
"summary": "text",
|
|
75
|
+
"content": "text",
|
|
76
|
+
"body": "text",
|
|
77
|
+
"comment": "text",
|
|
78
|
+
"note": "text",
|
|
79
|
+
"remark": "text",
|
|
80
|
+
"latitude": "float",
|
|
81
|
+
"longitude": "float",
|
|
82
|
+
"lat": "float",
|
|
83
|
+
"lng": "float",
|
|
84
|
+
"city": "sentence",
|
|
85
|
+
"country": "sentence",
|
|
86
|
+
"zip_code": "string",
|
|
87
|
+
"postal_code": "string",
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
EXACT_MATCH_PARAMS: ClassVar[dict[str, dict[str, Any]]] = {
|
|
91
|
+
"age": {"min_value": 18, "max_value": 100},
|
|
92
|
+
"count": {"min_value": 0, "max_value": 10000},
|
|
93
|
+
"quantity": {"min_value": 1, "max_value": 100},
|
|
94
|
+
"amount": {"min_value": 0.01, "max_value": 99999.99, "precision": 2},
|
|
95
|
+
"price": {"min_value": 0.01, "max_value": 9999.99, "precision": 2},
|
|
96
|
+
"cost": {"min_value": 0.01, "max_value": 9999.99, "precision": 2},
|
|
97
|
+
"salary": {"min_value": 3000.0, "max_value": 100000.0, "precision": 2},
|
|
98
|
+
"balance": {"min_value": 0.0, "max_value": 999999.99, "precision": 2},
|
|
99
|
+
"score": {"min_value": 0.0, "max_value": 100.0, "precision": 1},
|
|
100
|
+
"rating": {"min_value": 1.0, "max_value": 5.0, "precision": 1},
|
|
101
|
+
"weight": {"min_value": 0.1, "max_value": 500.0, "precision": 1},
|
|
102
|
+
"height": {"min_value": 50.0, "max_value": 250.0, "precision": 1},
|
|
103
|
+
"latitude": {"min_value": -90.0, "max_value": 90.0, "precision": 6},
|
|
104
|
+
"longitude": {"min_value": -180.0, "max_value": 180.0, "precision": 6},
|
|
105
|
+
"lat": {"min_value": -90.0, "max_value": 90.0, "precision": 6},
|
|
106
|
+
"lng": {"min_value": -180.0, "max_value": 180.0, "precision": 6},
|
|
107
|
+
"status": {"choices": [0, 1]},
|
|
108
|
+
"state": {"choices": [0, 1, 2]},
|
|
109
|
+
"gender": {"choices": ["male", "female", "other"]},
|
|
110
|
+
"sex": {"choices": ["male", "female"]},
|
|
111
|
+
"type": {"choices": [1, 2, 3]},
|
|
112
|
+
"level": {"choices": [1, 2, 3, 4, 5]},
|
|
113
|
+
"priority": {"choices": ["low", "medium", "high"]},
|
|
114
|
+
"role": {"choices": ["admin", "user", "guest"]},
|
|
115
|
+
"bio": {"min_length": 50, "max_length": 200},
|
|
116
|
+
"description": {"min_length": 100, "max_length": 500},
|
|
117
|
+
"content": {"min_length": 200, "max_length": 1000},
|
|
118
|
+
"comment": {"min_length": 10, "max_length": 200},
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
PATTERN_MATCH_RULES: ClassVar[list[tuple[str, str, dict[str, Any]]]] = [
|
|
122
|
+
(r"^id$", "autoincrement", {}),
|
|
123
|
+
(r".*_id$", "foreign_key_or_integer", {}),
|
|
124
|
+
(r".*_ids$", "json", {}),
|
|
125
|
+
(r".*_at$", "datetime", {}),
|
|
126
|
+
(r".*_date$", "date", {}),
|
|
127
|
+
(r".*_time$", "datetime", {}),
|
|
128
|
+
(r".*_timestamp$", "timestamp", {}),
|
|
129
|
+
(r"^created$", "datetime", {}),
|
|
130
|
+
(r"^updated$", "datetime", {}),
|
|
131
|
+
(r"^deleted$", "datetime", {}),
|
|
132
|
+
(r".*_count$|.*_num$|.*_number$", "integer", {"min_value": 0, "max_value": 10000}),
|
|
133
|
+
(r".*_amount$|.*_price$|.*_cost$|.*_fee$", "float", {"min_value": 0.01, "max_value": 99999.99, "precision": 2}),
|
|
134
|
+
(r".*_rate$|.*_ratio$|.*_percent$", "float", {"min_value": 0.0, "max_value": 1.0, "precision": 4}),
|
|
135
|
+
(r"^is_.*|^has_.*|^can_.*|^should_.*|^enable.*|^disable.*", "boolean", {}),
|
|
136
|
+
(r".*_code$", "string", {"min_length": 6, "max_length": 12, "charset": "alphanumeric"}),
|
|
137
|
+
(r".*_name$", "name", {}),
|
|
138
|
+
(r".*_email$", "email", {}),
|
|
139
|
+
(r".*_phone$|.*_tel$|.*_mobile$", "phone", {}),
|
|
140
|
+
(r".*_url$|.*_link$|.*_href$", "url", {}),
|
|
141
|
+
(r".*_path$|.*_file$", "string", {"min_length": 10, "max_length": 100}),
|
|
142
|
+
(r".*_key$|.*_token$|.*_hash$", "uuid", {}),
|
|
143
|
+
(r".*_password$|.*_passwd$|.*_secret$", "password", {}),
|
|
144
|
+
(r".*_address$", "address", {}),
|
|
145
|
+
(r".*_description$|.*_desc$|.*_text$|.*_content$|.*_body$", "text", {"min_length": 50, "max_length": 300}),
|
|
146
|
+
(r".*_title$|.*_subject$|.*_headline$", "sentence", {}),
|
|
147
|
+
]
|
|
148
|
+
|
|
149
|
+
TYPE_FALLBACK_RULES: ClassVar[dict[str, tuple[str, dict[str, Any]]]] = {
|
|
150
|
+
"INTEGER": ("integer", {"min_value": 0, "max_value": 999999}),
|
|
151
|
+
"INT8": ("integer", {"min_value": 0, "max_value": 255}),
|
|
152
|
+
"INT16": ("integer", {"min_value": 0, "max_value": 65535}),
|
|
153
|
+
"INT32": ("integer", {"min_value": 0, "max_value": 2147483647}),
|
|
154
|
+
"INT64": ("integer", {"min_value": 0, "max_value": 999999999}),
|
|
155
|
+
"INT": ("integer", {"min_value": 0, "max_value": 999999}),
|
|
156
|
+
"TINYINT": ("integer", {"min_value": 0, "max_value": 255}),
|
|
157
|
+
"SMALLINT": ("integer", {"min_value": 0, "max_value": 32767}),
|
|
158
|
+
"BIGINT": ("integer", {"min_value": 0, "max_value": 999999999}),
|
|
159
|
+
"REAL": ("float", {"min_value": 0.0, "max_value": 999999.0, "precision": 2}),
|
|
160
|
+
"FLOAT": ("float", {"min_value": 0.0, "max_value": 999999.0, "precision": 2}),
|
|
161
|
+
"DOUBLE": ("float", {"min_value": 0.0, "max_value": 999999.0, "precision": 2}),
|
|
162
|
+
"DECIMAL": ("float", {"min_value": 0.0, "max_value": 999999.0, "precision": 2}),
|
|
163
|
+
"NUMERIC": ("float", {"min_value": 0.0, "max_value": 999999.0}),
|
|
164
|
+
"TEXT": ("string", {"min_length": 5, "max_length": 50}),
|
|
165
|
+
"BLOB": ("bytes", {"length": 32}),
|
|
166
|
+
"BOOLEAN": ("boolean", {}),
|
|
167
|
+
"DATE": ("date", {}),
|
|
168
|
+
"DATETIME": ("datetime", {}),
|
|
169
|
+
"TIMESTAMP": ("timestamp", {}),
|
|
170
|
+
"VARCHAR": ("string", {}),
|
|
171
|
+
"CHAR": ("string", {}),
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
def __init__(self) -> None:
|
|
175
|
+
self._custom_exact_rules: dict[str, tuple[str, dict[str, Any]]] = {}
|
|
176
|
+
self._custom_pattern_rules: list[tuple[str, str, dict[str, Any]]] = []
|
|
177
|
+
|
|
178
|
+
def register_exact_rule(self, column_name: str, generator: str, params: dict[str, Any] | None = None) -> None:
|
|
179
|
+
self._custom_exact_rules[column_name.lower()] = (generator, params or {})
|
|
180
|
+
|
|
181
|
+
def register_pattern_rule(self, pattern: str, generator: str, params: dict[str, Any] | None = None) -> None:
|
|
182
|
+
self._custom_pattern_rules.append((pattern, generator, params or {}))
|
|
183
|
+
|
|
184
|
+
def map_column(self, column_info: ColumnInfo, user_config: Any = None) -> GeneratorSpec:
|
|
185
|
+
column_name = column_info.name.lower()
|
|
186
|
+
column_type = column_info.type.upper() if column_info.type else "TEXT"
|
|
187
|
+
|
|
188
|
+
if column_info.is_primary_key and (
|
|
189
|
+
column_info.is_autoincrement or "INTEGER" in column_type or "INT" in column_type
|
|
190
|
+
):
|
|
191
|
+
return GeneratorSpec(generator_name="skip")
|
|
192
|
+
|
|
193
|
+
if user_config and hasattr(user_config, "generator") and user_config.generator:
|
|
194
|
+
provider_val = (
|
|
195
|
+
user_config.provider.value if hasattr(user_config, "provider") and user_config.provider else None
|
|
196
|
+
)
|
|
197
|
+
return GeneratorSpec(
|
|
198
|
+
generator_name=user_config.generator,
|
|
199
|
+
params=user_config.params if hasattr(user_config, "params") else {},
|
|
200
|
+
null_ratio=user_config.null_ratio if hasattr(user_config, "null_ratio") else 0.0,
|
|
201
|
+
provider=provider_val,
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
if column_name in self._custom_exact_rules:
|
|
205
|
+
gen, params = self._custom_exact_rules[column_name]
|
|
206
|
+
return GeneratorSpec(generator_name=gen, params=params)
|
|
207
|
+
|
|
208
|
+
if column_name in self.EXACT_MATCH_RULES:
|
|
209
|
+
gen = self.EXACT_MATCH_RULES[column_name]
|
|
210
|
+
params = self.EXACT_MATCH_PARAMS.get(column_name, {})
|
|
211
|
+
return GeneratorSpec(generator_name=gen, params=params)
|
|
212
|
+
|
|
213
|
+
for pattern, gen, params in self._custom_pattern_rules:
|
|
214
|
+
if re.match(pattern, column_name):
|
|
215
|
+
return GeneratorSpec(generator_name=gen, params=params)
|
|
216
|
+
|
|
217
|
+
for pattern, gen, params in self.PATTERN_MATCH_RULES:
|
|
218
|
+
if re.match(pattern, column_name):
|
|
219
|
+
return GeneratorSpec(generator_name=gen, params=params)
|
|
220
|
+
|
|
221
|
+
if column_info.default is not None or column_info.nullable:
|
|
222
|
+
return GeneratorSpec(generator_name="skip")
|
|
223
|
+
|
|
224
|
+
return self._type_faithful_fallback(column_type)
|
|
225
|
+
|
|
226
|
+
def _type_faithful_fallback(self, column_type: str) -> GeneratorSpec:
|
|
227
|
+
import re as _re
|
|
228
|
+
|
|
229
|
+
length_match = _re.search(r"\((\d+)\)", column_type)
|
|
230
|
+
max_length = int(length_match.group(1)) if length_match else None
|
|
231
|
+
|
|
232
|
+
base_type = _re.sub(r"\(.*\)", "", column_type).strip()
|
|
233
|
+
|
|
234
|
+
for type_prefix, (gen, default_params) in self.TYPE_FALLBACK_RULES.items():
|
|
235
|
+
if base_type.startswith(type_prefix):
|
|
236
|
+
params = dict(default_params)
|
|
237
|
+
if max_length is not None:
|
|
238
|
+
if gen == "string":
|
|
239
|
+
params["min_length"] = 1
|
|
240
|
+
params["max_length"] = max_length
|
|
241
|
+
elif gen == "bytes":
|
|
242
|
+
params["length"] = max_length
|
|
243
|
+
return GeneratorSpec(generator_name=gen, params=params)
|
|
244
|
+
|
|
245
|
+
return GeneratorSpec(generator_name="string", params={"min_length": 5, "max_length": 50})
|
|
246
|
+
|
|
247
|
+
def map_columns(
|
|
248
|
+
self,
|
|
249
|
+
columns: list[ColumnInfo],
|
|
250
|
+
user_configs: dict[str, Any] | None = None,
|
|
251
|
+
) -> dict[str, GeneratorSpec]:
|
|
252
|
+
user_configs = user_configs or {}
|
|
253
|
+
result: dict[str, GeneratorSpec] = {}
|
|
254
|
+
for col in columns:
|
|
255
|
+
col_config = user_configs.get(col.name)
|
|
256
|
+
result[col.name] = self.map_column(col, col_config)
|
|
257
|
+
return result
|