sqlseed 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. sqlseed/__init__.py +121 -0
  2. sqlseed/_utils/__init__.py +11 -0
  3. sqlseed/_utils/logger.py +30 -0
  4. sqlseed/_utils/metrics.py +45 -0
  5. sqlseed/_utils/progress.py +14 -0
  6. sqlseed/_utils/schema_helpers.py +51 -0
  7. sqlseed/_utils/sql_safe.py +45 -0
  8. sqlseed/_version.py +1 -0
  9. sqlseed/cli/__init__.py +3 -0
  10. sqlseed/cli/main.py +316 -0
  11. sqlseed/config/__init__.py +14 -0
  12. sqlseed/config/loader.py +66 -0
  13. sqlseed/config/models.py +99 -0
  14. sqlseed/config/snapshot.py +91 -0
  15. sqlseed/core/__init__.py +14 -0
  16. sqlseed/core/column_dag.py +108 -0
  17. sqlseed/core/constraints.py +116 -0
  18. sqlseed/core/expression.py +71 -0
  19. sqlseed/core/mapper.py +257 -0
  20. sqlseed/core/orchestrator.py +578 -0
  21. sqlseed/core/relation.py +124 -0
  22. sqlseed/core/result.py +23 -0
  23. sqlseed/core/schema.py +97 -0
  24. sqlseed/core/transform.py +27 -0
  25. sqlseed/database/__init__.py +14 -0
  26. sqlseed/database/_protocol.py +72 -0
  27. sqlseed/database/optimizer.py +96 -0
  28. sqlseed/database/raw_sqlite_adapter.py +197 -0
  29. sqlseed/database/sqlite_utils_adapter.py +183 -0
  30. sqlseed/generators/__init__.py +11 -0
  31. sqlseed/generators/_protocol.py +73 -0
  32. sqlseed/generators/base_provider.py +448 -0
  33. sqlseed/generators/faker_provider.py +157 -0
  34. sqlseed/generators/mimesis_provider.py +203 -0
  35. sqlseed/generators/registry.py +86 -0
  36. sqlseed/generators/stream.py +157 -0
  37. sqlseed/py.typed +0 -0
  38. sqlseed-0.1.0.dist-info/METADATA +934 -0
  39. sqlseed-0.1.0.dist-info/RECORD +42 -0
  40. sqlseed-0.1.0.dist-info/WHEEL +4 -0
  41. sqlseed-0.1.0.dist-info/entry_points.txt +6 -0
  42. sqlseed-0.1.0.dist-info/licenses/LICENSE +17 -0
@@ -0,0 +1,448 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import random
5
+ import string
6
+ import uuid
7
+ from datetime import datetime, timedelta
8
+ from typing import Any
9
+
10
+
11
+ class BaseProvider:
12
+ """Built-in data generator with no external dependencies."""
13
+
14
+ def __init__(self) -> None:
15
+ self._rng = random.Random()
16
+ self._locale: str = "en_US"
17
+
18
+ @property
19
+ def name(self) -> str:
20
+ return "base"
21
+
22
+ def set_locale(self, locale: str) -> None:
23
+ self._locale = locale
24
+
25
+ def set_seed(self, seed: int) -> None:
26
+ self._rng = random.Random(seed)
27
+
28
+ def generate_string(
29
+ self,
30
+ *,
31
+ min_length: int = 1,
32
+ max_length: int = 100,
33
+ charset: str | None = None,
34
+ ) -> str:
35
+ if charset == "alphanumeric":
36
+ chars = string.ascii_letters + string.digits
37
+ elif charset == "alpha":
38
+ chars = string.ascii_letters
39
+ elif charset == "digits":
40
+ chars = string.digits
41
+ elif charset is not None:
42
+ chars = charset
43
+ else:
44
+ chars = string.ascii_letters + string.digits + " _-"
45
+ length = self._rng.randint(min_length, max_length)
46
+ return "".join(self._rng.choice(chars) for _ in range(length))
47
+
48
+ def generate_integer(self, *, min_value: int = 0, max_value: int = 999999) -> int:
49
+ return self._rng.randint(min_value, max_value)
50
+
51
+ def generate_float(
52
+ self,
53
+ *,
54
+ min_value: float = 0.0,
55
+ max_value: float = 999999.0,
56
+ precision: int = 2,
57
+ ) -> float:
58
+ value = self._rng.uniform(min_value, max_value)
59
+ return round(value, precision)
60
+
61
+ def generate_boolean(self) -> bool:
62
+ return self._rng.choice([True, False])
63
+
64
+ def generate_bytes(self, *, length: int = 16) -> bytes:
65
+ return self._rng.randbytes(length)
66
+
67
+ def generate_name(self) -> str:
68
+ first_names = [
69
+ "James",
70
+ "Mary",
71
+ "John",
72
+ "Patricia",
73
+ "Robert",
74
+ "Jennifer",
75
+ "Michael",
76
+ "Linda",
77
+ "William",
78
+ "Elizabeth",
79
+ "David",
80
+ "Barbara",
81
+ "Richard",
82
+ "Susan",
83
+ "Joseph",
84
+ "Jessica",
85
+ "Thomas",
86
+ "Sarah",
87
+ "Charles",
88
+ "Karen",
89
+ "Christopher",
90
+ "Lisa",
91
+ "Daniel",
92
+ "Nancy",
93
+ "Matthew",
94
+ "Betty",
95
+ "Anthony",
96
+ "Margaret",
97
+ "Mark",
98
+ "Sandra",
99
+ "Donald",
100
+ "Ashley",
101
+ "Steven",
102
+ "Kimberly",
103
+ "Paul",
104
+ "Emily",
105
+ "Andrew",
106
+ "Donna",
107
+ "Joshua",
108
+ "Michelle",
109
+ "Kenneth",
110
+ "Carol",
111
+ "Kevin",
112
+ "Amanda",
113
+ "Brian",
114
+ "Dorothy",
115
+ "George",
116
+ "Melissa",
117
+ "Timothy",
118
+ "Deborah",
119
+ "Ronald",
120
+ "Stephanie",
121
+ "Edward",
122
+ "Rebecca",
123
+ "Jason",
124
+ "Sharon",
125
+ "Jeffrey",
126
+ "Laura",
127
+ "Ryan",
128
+ "Cynthia",
129
+ ]
130
+ last_names = [
131
+ "Smith",
132
+ "Johnson",
133
+ "Williams",
134
+ "Brown",
135
+ "Jones",
136
+ "Garcia",
137
+ "Miller",
138
+ "Davis",
139
+ "Rodriguez",
140
+ "Martinez",
141
+ "Hernandez",
142
+ "Lopez",
143
+ "Gonzalez",
144
+ "Wilson",
145
+ "Anderson",
146
+ "Thomas",
147
+ "Taylor",
148
+ "Moore",
149
+ "Jackson",
150
+ "Martin",
151
+ "Lee",
152
+ "Perez",
153
+ "Thompson",
154
+ "White",
155
+ "Harris",
156
+ "Sanchez",
157
+ "Clark",
158
+ "Ramirez",
159
+ "Lewis",
160
+ "Robinson",
161
+ "Walker",
162
+ "Young",
163
+ "Allen",
164
+ "King",
165
+ "Wright",
166
+ "Scott",
167
+ "Torres",
168
+ "Nguyen",
169
+ "Hill",
170
+ "Flores",
171
+ "Green",
172
+ "Adams",
173
+ "Nelson",
174
+ "Baker",
175
+ "Hall",
176
+ "Rivera",
177
+ "Campbell",
178
+ "Mitchell",
179
+ "Carter",
180
+ "Roberts",
181
+ "Gomez",
182
+ "Phillips",
183
+ "Evans",
184
+ "Turner",
185
+ "Diaz",
186
+ "Parker",
187
+ "Cruz",
188
+ "Edwards",
189
+ "Collins",
190
+ "Reyes",
191
+ ]
192
+ return f"{self._rng.choice(first_names)} {self._rng.choice(last_names)}"
193
+
194
+ def generate_first_name(self) -> str:
195
+ names = [
196
+ "James",
197
+ "Mary",
198
+ "John",
199
+ "Patricia",
200
+ "Robert",
201
+ "Jennifer",
202
+ "Michael",
203
+ "Linda",
204
+ "William",
205
+ "Elizabeth",
206
+ "David",
207
+ "Barbara",
208
+ "Richard",
209
+ "Susan",
210
+ "Joseph",
211
+ "Jessica",
212
+ "Thomas",
213
+ "Sarah",
214
+ "Charles",
215
+ "Karen",
216
+ "Christopher",
217
+ "Lisa",
218
+ "Daniel",
219
+ "Nancy",
220
+ "Matthew",
221
+ "Betty",
222
+ "Anthony",
223
+ "Margaret",
224
+ "Mark",
225
+ "Sandra",
226
+ ]
227
+ return self._rng.choice(names)
228
+
229
+ def generate_last_name(self) -> str:
230
+ names = [
231
+ "Smith",
232
+ "Johnson",
233
+ "Williams",
234
+ "Brown",
235
+ "Jones",
236
+ "Garcia",
237
+ "Miller",
238
+ "Davis",
239
+ "Rodriguez",
240
+ "Martinez",
241
+ "Hernandez",
242
+ "Lopez",
243
+ "Gonzalez",
244
+ "Wilson",
245
+ "Anderson",
246
+ "Thomas",
247
+ "Taylor",
248
+ "Moore",
249
+ "Jackson",
250
+ "Martin",
251
+ "Lee",
252
+ "Perez",
253
+ "Thompson",
254
+ "White",
255
+ "Harris",
256
+ "Sanchez",
257
+ "Clark",
258
+ "Ramirez",
259
+ "Lewis",
260
+ "Robinson",
261
+ ]
262
+ return self._rng.choice(names)
263
+
264
+ def generate_email(self) -> str:
265
+ first = self.generate_first_name().lower()
266
+ last = self.generate_last_name().lower()
267
+ domains = ["example.com", "test.org", "mail.net", "demo.io", "sample.dev"]
268
+ return f"{first}.{last}{self._rng.randint(1, 999)}@{self._rng.choice(domains)}"
269
+
270
+ def generate_phone(self) -> str:
271
+ area = self._rng.randint(200, 999)
272
+ mid = self._rng.randint(100, 999)
273
+ end = self._rng.randint(1000, 9999)
274
+ return f"{area}-{mid}-{end}"
275
+
276
+ def generate_address(self) -> str:
277
+ streets = [
278
+ "Main St",
279
+ "Oak Ave",
280
+ "Pine Rd",
281
+ "Elm Blvd",
282
+ "Cedar Ln",
283
+ "Maple Dr",
284
+ "Washington Ave",
285
+ "Park Rd",
286
+ "Lake Dr",
287
+ "Hill St",
288
+ ]
289
+ numbers = list(range(1, 9999))
290
+ cities = [
291
+ "Springfield",
292
+ "Portland",
293
+ "Franklin",
294
+ "Clinton",
295
+ "Madison",
296
+ "Georgetown",
297
+ "Arlington",
298
+ "Salem",
299
+ "Fairview",
300
+ "Chester",
301
+ ]
302
+ states = ["CA", "NY", "TX", "FL", "IL", "PA", "OH", "GA", "NC", "MI"]
303
+ num = self._rng.choice(numbers)
304
+ street = self._rng.choice(streets)
305
+ city = self._rng.choice(cities)
306
+ state = self._rng.choice(states)
307
+ return f"{num} {street}, {city}, {state}"
308
+
309
+ def generate_company(self) -> str:
310
+ prefixes = ["Global", "Prime", "Alpha", "Elite", "Tech", "Nova", "Apex", "Core"]
311
+ suffixes = ["Corp", "Inc", "LLC", "Ltd", "Group", "Systems", "Solutions", "Labs"]
312
+ return f"{self._rng.choice(prefixes)} {self._rng.choice(suffixes)}"
313
+
314
+ def generate_url(self) -> str:
315
+ domains = ["example", "test", "demo", "sample", "mysite"]
316
+ tlds = ["com", "org", "net", "io", "dev"]
317
+ paths = ["", "/home", "/about", "/products", "/blog", "/api/v1"]
318
+ domain = self._rng.choice(domains)
319
+ tld = self._rng.choice(tlds)
320
+ path = self._rng.choice(paths)
321
+ return f"https://www.{domain}.{tld}{path}"
322
+
323
+ def generate_ipv4(self) -> str:
324
+ o1 = self._rng.randint(1, 255)
325
+ o2 = self._rng.randint(0, 255)
326
+ o3 = self._rng.randint(0, 255)
327
+ o4 = self._rng.randint(1, 254)
328
+ return f"{o1}.{o2}.{o3}.{o4}"
329
+
330
+ def generate_uuid(self) -> str:
331
+ return str(uuid.UUID(bytes=self._rng.randbytes(16), version=4))
332
+
333
+ def generate_date(self, *, start_year: int = 2000, end_year: int | None = None) -> str:
334
+ if end_year is None:
335
+ end_year = datetime.now().year
336
+ end_year = max(end_year, start_year)
337
+ start = datetime(start_year, 1, 1)
338
+ end = datetime(end_year, 12, 31)
339
+ delta = max((end - start).days, 0)
340
+ random_date = start + timedelta(days=self._rng.randint(0, max(delta, 1)))
341
+ return random_date.strftime("%Y-%m-%d")
342
+
343
+ def generate_datetime(self, *, start_year: int = 2000, end_year: int | None = None) -> str:
344
+ if end_year is None:
345
+ end_year = datetime.now().year
346
+ end_year = max(end_year, start_year)
347
+ start = datetime(start_year, 1, 1)
348
+ end = datetime(end_year, 12, 31, 23, 59, 59)
349
+ delta = max((end - start).total_seconds(), 0)
350
+ random_dt = start + timedelta(seconds=self._rng.uniform(0, max(delta, 1)))
351
+ return random_dt.strftime("%Y-%m-%d %H:%M:%S")
352
+
353
+ def generate_timestamp(self) -> int:
354
+ start = datetime(2000, 1, 1)
355
+ end = datetime(2030, 12, 31, 23, 59, 59)
356
+ delta = (end - start).total_seconds()
357
+ random_dt = start + timedelta(seconds=self._rng.uniform(0, delta))
358
+ return int(random_dt.timestamp())
359
+
360
+ def generate_text(self, *, min_length: int = 50, max_length: int = 200) -> str:
361
+ words = [
362
+ "lorem",
363
+ "ipsum",
364
+ "dolor",
365
+ "sit",
366
+ "amet",
367
+ "consectetur",
368
+ "adipiscing",
369
+ "elit",
370
+ "sed",
371
+ "do",
372
+ "eiusmod",
373
+ "tempor",
374
+ "incididunt",
375
+ "ut",
376
+ "labore",
377
+ "et",
378
+ "dolore",
379
+ "magna",
380
+ "aliqua",
381
+ "enim",
382
+ "ad",
383
+ "minim",
384
+ "veniam",
385
+ "quis",
386
+ "nostrud",
387
+ "exercitation",
388
+ "ullamco",
389
+ "laboris",
390
+ "nisi",
391
+ ]
392
+ length = self._rng.randint(min_length, max_length)
393
+ result = ""
394
+ while len(result) < length:
395
+ word = self._rng.choice(words)
396
+ if result:
397
+ result += " "
398
+ result += word
399
+ return result[:length]
400
+
401
+ def generate_sentence(self) -> str:
402
+ subjects = ["The system", "A user", "The process", "An event", "The service"]
403
+ verbs = ["completed", "started", "failed", "succeeded", "processed"]
404
+ objects = ["the operation", "a request", "the task", "an update", "the transaction"]
405
+ return f"{self._rng.choice(subjects)} {self._rng.choice(verbs)} {self._rng.choice(objects)}."
406
+
407
+ def generate_password(self, *, length: int = 16) -> str:
408
+ chars = string.ascii_letters + string.digits + string.punctuation
409
+ return "".join(self._rng.choice(chars) for _ in range(length))
410
+
411
+ def generate_choice(self, choices: list[Any]) -> Any:
412
+ return self._rng.choice(choices)
413
+
414
+ def generate_json(self, *, schema: dict[str, Any] | None = None) -> str:
415
+ if schema is None:
416
+ data = {
417
+ "id": self.generate_integer(min_value=1, max_value=999999),
418
+ "name": self.generate_name(),
419
+ "active": self.generate_boolean(),
420
+ }
421
+ else:
422
+ data = self._generate_from_schema(schema)
423
+ return json.dumps(data)
424
+
425
+ def generate_pattern(self, *, regex: str) -> str:
426
+ import rstr
427
+
428
+ r = rstr.Rstr(self._rng)
429
+ return r.xeger(regex)
430
+
431
+ def _generate_from_schema(self, schema: dict[str, Any]) -> Any:
432
+ schema_type = schema.get("type", "string")
433
+ if schema_type == "string":
434
+ return self.generate_string(min_length=5, max_length=20)
435
+ if schema_type == "integer":
436
+ return self.generate_integer()
437
+ if schema_type == "number":
438
+ return self.generate_float()
439
+ if schema_type == "boolean":
440
+ return self.generate_boolean()
441
+ if schema_type == "array":
442
+ items = schema.get("items", {"type": "string"})
443
+ count = self._rng.randint(1, 5)
444
+ return [self._generate_from_schema(items) for _ in range(count)]
445
+ if schema_type == "object":
446
+ properties = schema.get("properties", {})
447
+ return {k: self._generate_from_schema(v) for k, v in properties.items()}
448
+ return self.generate_string()
@@ -0,0 +1,157 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any
4
+
5
+ from sqlseed._utils.logger import get_logger
6
+
7
+ logger = get_logger(__name__)
8
+
9
+
10
+ class FakerProvider:
11
+ """Faker-based data generator adapter."""
12
+
13
+ def __init__(self) -> None:
14
+ self._faker: Any = None
15
+ self._locale: str = "en_US"
16
+ self._seed: int | None = None
17
+ self._init_faker()
18
+
19
+ def _init_faker(self) -> None:
20
+ try:
21
+ from faker import Faker
22
+
23
+ self._faker = Faker(self._locale)
24
+ except ImportError:
25
+ raise ImportError("Faker is not installed. Install it with: pip install sqlseed[faker]") from None
26
+
27
+ @property
28
+ def name(self) -> str:
29
+ return "faker"
30
+
31
+ def set_locale(self, locale: str) -> None:
32
+ self._locale = locale
33
+ self._init_faker()
34
+
35
+ def set_seed(self, seed: int) -> None:
36
+ self._faker.seed_instance(seed)
37
+
38
+ def generate_string(
39
+ self,
40
+ *,
41
+ min_length: int = 1,
42
+ max_length: int = 100,
43
+ charset: str | None = None,
44
+ ) -> str:
45
+ import string
46
+
47
+ if charset == "alphanumeric":
48
+ chars = string.ascii_letters + string.digits
49
+ elif charset == "alpha":
50
+ chars = string.ascii_letters
51
+ elif charset == "digits":
52
+ chars = string.digits
53
+ elif charset is not None:
54
+ chars = charset
55
+ else:
56
+ chars = string.ascii_letters + string.digits + " _-"
57
+ length = self._faker.random_int(min=min_length, max=max_length)
58
+ return "".join(self._faker.random_element(chars) for _ in range(length))
59
+
60
+ def generate_integer(self, *, min_value: int = 0, max_value: int = 999999) -> int:
61
+ return self._faker.random_int(min=min_value, max=max_value)
62
+
63
+ def generate_float(
64
+ self,
65
+ *,
66
+ min_value: float = 0.0,
67
+ max_value: float = 999999.0,
68
+ precision: int = 2,
69
+ ) -> float:
70
+ return round(self._faker.pyfloat(min_value=min_value, max_value=max_value, right_digits=precision), precision)
71
+
72
+ def generate_boolean(self) -> bool:
73
+ return self._faker.boolean()
74
+
75
+ def generate_bytes(self, *, length: int = 16) -> bytes:
76
+ return self._faker.binary(length=length)
77
+
78
+ def generate_name(self) -> str:
79
+ return self._faker.name()
80
+
81
+ def generate_first_name(self) -> str:
82
+ return self._faker.first_name()
83
+
84
+ def generate_last_name(self) -> str:
85
+ return self._faker.last_name()
86
+
87
+ def generate_email(self) -> str:
88
+ return self._faker.email()
89
+
90
+ def generate_phone(self) -> str:
91
+ return self._faker.phone_number()
92
+
93
+ def generate_address(self) -> str:
94
+ return self._faker.address().replace("\n", ", ")
95
+
96
+ def generate_company(self) -> str:
97
+ return self._faker.company()
98
+
99
+ def generate_url(self) -> str:
100
+ return self._faker.url()
101
+
102
+ def generate_ipv4(self) -> str:
103
+ return self._faker.ipv4()
104
+
105
+ def generate_uuid(self) -> str:
106
+ return self._faker.uuid4()
107
+
108
+ def generate_date(self, *, start_year: int = 2000, end_year: int | None = None) -> str:
109
+ from datetime import datetime
110
+
111
+ if end_year is None:
112
+ end_year = datetime.now().year
113
+ start = datetime(start_year, 1, 1).date()
114
+ end = datetime(end_year, 12, 31).date()
115
+ return self._faker.date_between_dates(date_start=start, date_end=end).strftime("%Y-%m-%d")
116
+
117
+ def generate_datetime(self, *, start_year: int = 2000, end_year: int | None = None) -> str:
118
+ from datetime import datetime
119
+
120
+ if end_year is None:
121
+ end_year = datetime.now().year
122
+ start = datetime(start_year, 1, 1)
123
+ end = datetime(end_year, 12, 31, 23, 59, 59)
124
+ dt = self._faker.date_time_between_dates(datetime_start=start, datetime_end=end)
125
+ return dt.strftime("%Y-%m-%d %H:%M:%S")
126
+
127
+ def generate_timestamp(self) -> int:
128
+ import time
129
+
130
+ dt = self._faker.date_time_this_decade()
131
+ return int(time.mktime(dt.timetuple()))
132
+
133
+ def generate_text(self, *, min_length: int = 50, max_length: int = 200) -> str:
134
+ text = self._faker.text(max_nb_chars=max_length)
135
+ while len(text) < min_length:
136
+ text += " " + self._faker.text(max_nb_chars=max_length - len(text))
137
+ return text[:max_length]
138
+
139
+ def generate_sentence(self) -> str:
140
+ return self._faker.sentence()
141
+
142
+ def generate_password(self, *, length: int = 16) -> str:
143
+ return self._faker.password(length=length)
144
+
145
+ def generate_choice(self, choices: list[Any]) -> Any:
146
+ return self._faker.random_element(choices)
147
+
148
+ def generate_json(self, *, schema: dict[str, Any] | None = None) -> str:
149
+ return self._faker.json(data_columns=schema)
150
+
151
+ def generate_pattern(self, *, regex: str) -> str:
152
+ import random
153
+
154
+ import rstr
155
+
156
+ rng = random.Random(self._seed)
157
+ return rstr.Rstr(rng).xeger(regex)