sqlseed 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. sqlseed/__init__.py +121 -0
  2. sqlseed/_utils/__init__.py +11 -0
  3. sqlseed/_utils/logger.py +30 -0
  4. sqlseed/_utils/metrics.py +45 -0
  5. sqlseed/_utils/progress.py +14 -0
  6. sqlseed/_utils/schema_helpers.py +51 -0
  7. sqlseed/_utils/sql_safe.py +45 -0
  8. sqlseed/_version.py +1 -0
  9. sqlseed/cli/__init__.py +3 -0
  10. sqlseed/cli/main.py +316 -0
  11. sqlseed/config/__init__.py +14 -0
  12. sqlseed/config/loader.py +66 -0
  13. sqlseed/config/models.py +99 -0
  14. sqlseed/config/snapshot.py +91 -0
  15. sqlseed/core/__init__.py +14 -0
  16. sqlseed/core/column_dag.py +108 -0
  17. sqlseed/core/constraints.py +116 -0
  18. sqlseed/core/expression.py +71 -0
  19. sqlseed/core/mapper.py +257 -0
  20. sqlseed/core/orchestrator.py +578 -0
  21. sqlseed/core/relation.py +124 -0
  22. sqlseed/core/result.py +23 -0
  23. sqlseed/core/schema.py +97 -0
  24. sqlseed/core/transform.py +27 -0
  25. sqlseed/database/__init__.py +14 -0
  26. sqlseed/database/_protocol.py +72 -0
  27. sqlseed/database/optimizer.py +96 -0
  28. sqlseed/database/raw_sqlite_adapter.py +197 -0
  29. sqlseed/database/sqlite_utils_adapter.py +183 -0
  30. sqlseed/generators/__init__.py +11 -0
  31. sqlseed/generators/_protocol.py +73 -0
  32. sqlseed/generators/base_provider.py +448 -0
  33. sqlseed/generators/faker_provider.py +157 -0
  34. sqlseed/generators/mimesis_provider.py +203 -0
  35. sqlseed/generators/registry.py +86 -0
  36. sqlseed/generators/stream.py +157 -0
  37. sqlseed/py.typed +0 -0
  38. sqlseed-0.1.0.dist-info/METADATA +934 -0
  39. sqlseed-0.1.0.dist-info/RECORD +42 -0
  40. sqlseed-0.1.0.dist-info/WHEEL +4 -0
  41. sqlseed-0.1.0.dist-info/entry_points.txt +6 -0
  42. sqlseed-0.1.0.dist-info/licenses/LICENSE +17 -0
@@ -0,0 +1,183 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any
4
+
5
+ from typing_extensions import Self
6
+
7
+ from sqlseed._utils.logger import get_logger
8
+ from sqlseed._utils.sql_safe import quote_identifier
9
+ from sqlseed.database._protocol import ColumnInfo, ForeignKeyInfo, IndexInfo
10
+ from sqlseed.database.optimizer import PragmaOptimizer
11
+
12
+ if TYPE_CHECKING:
13
+ from collections.abc import Iterator
14
+
15
+ logger = get_logger(__name__)
16
+
17
+
18
+ class SQLiteUtilsAdapter:
19
+ def __init__(self) -> None:
20
+ self._db: Any = None
21
+ self._optimizer: PragmaOptimizer | None = None
22
+ self._db_path: str = ""
23
+
24
+ def connect(self, db_path: str) -> None:
25
+ import sqlite_utils
26
+
27
+ self._db_path = db_path
28
+ self._db = sqlite_utils.Database(db_path)
29
+ self._optimizer = PragmaOptimizer(
30
+ execute_fn=self._execute_pragma,
31
+ fetch_pragma_fn=self._fetch_pragma,
32
+ )
33
+ logger.debug("Connected to database via sqlite-utils", db_path=db_path)
34
+
35
+ def close(self) -> None:
36
+ if self._db is not None:
37
+ self._db.close()
38
+ self._db = None
39
+ logger.debug("Closed sqlite-utils connection", db_path=self._db_path)
40
+
41
+ def get_table_names(self) -> list[str]:
42
+ return list(self._db.table_names())
43
+
44
+ def get_column_info(self, table_name: str) -> list[ColumnInfo]:
45
+ table = self._db[table_name]
46
+ pks = self.get_primary_keys(table_name)
47
+ fks = {fk.column for fk in self.get_foreign_keys(table_name)}
48
+
49
+ result: list[ColumnInfo] = []
50
+ for col in table.columns:
51
+ col_name = col.name
52
+ is_pk = col_name in pks
53
+ is_autoincrement = is_pk and self._is_autoincrement(table_name, col_name)
54
+ nullable = not is_pk and col_name not in fks and not col.notnull
55
+
56
+ default = col.default_value
57
+
58
+ result.append(
59
+ ColumnInfo(
60
+ name=col_name,
61
+ type=col.type if isinstance(col.type, str) else str(col.type),
62
+ nullable=nullable,
63
+ default=default,
64
+ is_primary_key=is_pk,
65
+ is_autoincrement=is_autoincrement,
66
+ )
67
+ )
68
+ return result
69
+
70
+ def get_primary_keys(self, table_name: str) -> list[str]:
71
+ try:
72
+ table = self._db[table_name]
73
+ pks = table.pks
74
+ return pks if pks else []
75
+ except Exception:
76
+ logger.debug("Failed to get primary keys", table=table_name)
77
+ return []
78
+
79
+ def get_foreign_keys(self, table_name: str) -> list[ForeignKeyInfo]:
80
+ try:
81
+ table = self._db[table_name]
82
+ fks = table.foreign_keys
83
+ return [
84
+ ForeignKeyInfo(
85
+ column=fk.column,
86
+ ref_table=fk.other_table,
87
+ ref_column=fk.other_column,
88
+ )
89
+ for fk in fks
90
+ ]
91
+ except Exception:
92
+ logger.debug("Failed to get foreign keys", table=table_name)
93
+ return []
94
+
95
+ def get_row_count(self, table_name: str) -> int:
96
+ return int(self._db[table_name].count)
97
+
98
+ def get_column_values(self, table_name: str, column_name: str, limit: int = 1000) -> list[Any]:
99
+ safe_table = quote_identifier(table_name)
100
+ safe_column = quote_identifier(column_name)
101
+ sql = f"SELECT {safe_column} FROM {safe_table} LIMIT ?"
102
+ rows = self._db.execute(sql, [limit]).fetchall()
103
+ return [row[0] for row in rows]
104
+
105
+ def get_index_info(self, table_name: str) -> list[IndexInfo]:
106
+ safe_table = quote_identifier(table_name)
107
+ rows = self._db.execute(f"PRAGMA index_list({safe_table})").fetchall()
108
+ result: list[IndexInfo] = []
109
+ for row in rows:
110
+ idx_name = row[1]
111
+ is_unique = bool(row[2])
112
+ if idx_name.startswith("sqlite_autoindex_"):
113
+ continue
114
+ col_rows = self._db.execute(f"PRAGMA index_info({quote_identifier(idx_name)})").fetchall()
115
+ columns = [cr[2] for cr in col_rows if cr[2] is not None]
116
+ result.append(IndexInfo(name=idx_name, table=table_name, columns=columns, unique=is_unique))
117
+ return result
118
+
119
+ def get_sample_rows(self, table_name: str, limit: int = 5) -> list[dict[str, Any]]:
120
+ safe_table = quote_identifier(table_name)
121
+ columns = self.get_column_info(table_name)
122
+ col_names = [quote_identifier(c.name) for c in columns]
123
+ cols_sql = ", ".join(col_names)
124
+ sql = f"SELECT {cols_sql} FROM {safe_table} LIMIT ?"
125
+ rows = self._db.execute(sql, [limit]).fetchall()
126
+ col_name_list = [c.name for c in columns]
127
+ return [dict(zip(col_name_list, row, strict=False)) for row in rows]
128
+
129
+ def batch_insert(
130
+ self,
131
+ table_name: str,
132
+ data: Iterator[dict[str, Any]],
133
+ batch_size: int = 5000,
134
+ ) -> int:
135
+ inserted = 0
136
+ batch: list[dict[str, Any]] = []
137
+ for row in data:
138
+ batch.append(row)
139
+ if len(batch) >= batch_size:
140
+ self._db[table_name].insert_all(batch)
141
+ inserted += len(batch)
142
+ batch = []
143
+ if batch:
144
+ self._db[table_name].insert_all(batch)
145
+ inserted += len(batch)
146
+ return inserted
147
+
148
+ def clear_table(self, table_name: str) -> None:
149
+ safe_table = quote_identifier(table_name)
150
+ self._db.execute(f"DELETE FROM {safe_table}")
151
+ logger.debug("Cleared table", table_name=table_name)
152
+
153
+ def optimize_for_bulk_write(self, expected_rows: int | None = None) -> None:
154
+ if self._optimizer is not None:
155
+ self._optimizer.preserve()
156
+ self._optimizer.optimize(expected_rows)
157
+
158
+ def restore_settings(self) -> None:
159
+ if self._optimizer is not None:
160
+ self._optimizer.restore()
161
+
162
+ def _is_autoincrement(self, table_name: str, column_name: str) -> bool:
163
+ from sqlseed._utils.schema_helpers import detect_autoincrement
164
+
165
+ return detect_autoincrement(self._db.execute, table_name, column_name)
166
+
167
+ def _execute_pragma(self, sql: str) -> None:
168
+ self._db.execute(sql)
169
+
170
+ def _fetch_pragma(self, name: str) -> Any:
171
+ result = self._db.execute(f"PRAGMA {name}").fetchone()
172
+ return result[0] if result else None
173
+
174
+ def __enter__(self) -> Self:
175
+ return self
176
+
177
+ def __exit__(
178
+ self,
179
+ exc_type: type[BaseException] | None,
180
+ exc_val: BaseException | None,
181
+ exc_tb: Any,
182
+ ) -> None:
183
+ self.close()
@@ -0,0 +1,11 @@
1
+ from sqlseed.generators._protocol import DataProvider
2
+ from sqlseed.generators.base_provider import BaseProvider
3
+ from sqlseed.generators.registry import ProviderRegistry
4
+ from sqlseed.generators.stream import DataStream
5
+
6
+ __all__ = [
7
+ "BaseProvider",
8
+ "DataProvider",
9
+ "DataStream",
10
+ "ProviderRegistry",
11
+ ]
@@ -0,0 +1,73 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any, Protocol, runtime_checkable
4
+
5
+
6
+ @runtime_checkable
7
+ class DataProvider(Protocol):
8
+ @property
9
+ def name(self) -> str: ...
10
+
11
+ def set_locale(self, locale: str) -> None: ...
12
+
13
+ def set_seed(self, seed: int) -> None: ...
14
+
15
+ def generate_string(
16
+ self,
17
+ *,
18
+ min_length: int = 1,
19
+ max_length: int = 100,
20
+ charset: str | None = None,
21
+ ) -> str: ...
22
+
23
+ def generate_integer(self, *, min_value: int = 0, max_value: int = 999999) -> int: ...
24
+
25
+ def generate_float(
26
+ self,
27
+ *,
28
+ min_value: float = 0.0,
29
+ max_value: float = 999999.0,
30
+ precision: int = 2,
31
+ ) -> float: ...
32
+
33
+ def generate_boolean(self) -> bool: ...
34
+
35
+ def generate_bytes(self, *, length: int = 16) -> bytes: ...
36
+
37
+ def generate_name(self) -> str: ...
38
+
39
+ def generate_first_name(self) -> str: ...
40
+
41
+ def generate_last_name(self) -> str: ...
42
+
43
+ def generate_email(self) -> str: ...
44
+
45
+ def generate_phone(self) -> str: ...
46
+
47
+ def generate_address(self) -> str: ...
48
+
49
+ def generate_company(self) -> str: ...
50
+
51
+ def generate_url(self) -> str: ...
52
+
53
+ def generate_ipv4(self) -> str: ...
54
+
55
+ def generate_uuid(self) -> str: ...
56
+
57
+ def generate_date(self, *, start_year: int = 2000, end_year: int | None = None) -> str: ...
58
+
59
+ def generate_datetime(self, *, start_year: int = 2000, end_year: int | None = None) -> str: ...
60
+
61
+ def generate_timestamp(self) -> int: ...
62
+
63
+ def generate_text(self, *, min_length: int = 50, max_length: int = 200) -> str: ...
64
+
65
+ def generate_sentence(self) -> str: ...
66
+
67
+ def generate_password(self, *, length: int = 16) -> str: ...
68
+
69
+ def generate_choice(self, choices: list[Any]) -> Any: ...
70
+
71
+ def generate_json(self, *, schema: dict[str, Any] | None = None) -> str: ...
72
+
73
+ def generate_pattern(self, *, regex: str) -> str: ...