sqlseed 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlseed/__init__.py +121 -0
- sqlseed/_utils/__init__.py +11 -0
- sqlseed/_utils/logger.py +30 -0
- sqlseed/_utils/metrics.py +45 -0
- sqlseed/_utils/progress.py +14 -0
- sqlseed/_utils/schema_helpers.py +51 -0
- sqlseed/_utils/sql_safe.py +45 -0
- sqlseed/_version.py +1 -0
- sqlseed/cli/__init__.py +3 -0
- sqlseed/cli/main.py +316 -0
- sqlseed/config/__init__.py +14 -0
- sqlseed/config/loader.py +66 -0
- sqlseed/config/models.py +99 -0
- sqlseed/config/snapshot.py +91 -0
- sqlseed/core/__init__.py +14 -0
- sqlseed/core/column_dag.py +108 -0
- sqlseed/core/constraints.py +116 -0
- sqlseed/core/expression.py +71 -0
- sqlseed/core/mapper.py +257 -0
- sqlseed/core/orchestrator.py +578 -0
- sqlseed/core/relation.py +124 -0
- sqlseed/core/result.py +23 -0
- sqlseed/core/schema.py +97 -0
- sqlseed/core/transform.py +27 -0
- sqlseed/database/__init__.py +14 -0
- sqlseed/database/_protocol.py +72 -0
- sqlseed/database/optimizer.py +96 -0
- sqlseed/database/raw_sqlite_adapter.py +197 -0
- sqlseed/database/sqlite_utils_adapter.py +183 -0
- sqlseed/generators/__init__.py +11 -0
- sqlseed/generators/_protocol.py +73 -0
- sqlseed/generators/base_provider.py +448 -0
- sqlseed/generators/faker_provider.py +157 -0
- sqlseed/generators/mimesis_provider.py +203 -0
- sqlseed/generators/registry.py +86 -0
- sqlseed/generators/stream.py +157 -0
- sqlseed/py.typed +0 -0
- sqlseed-0.1.0.dist-info/METADATA +934 -0
- sqlseed-0.1.0.dist-info/RECORD +42 -0
- sqlseed-0.1.0.dist-info/WHEEL +4 -0
- sqlseed-0.1.0.dist-info/entry_points.txt +6 -0
- sqlseed-0.1.0.dist-info/licenses/LICENSE +17 -0
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, Any
|
|
4
|
+
|
|
5
|
+
from typing_extensions import Self
|
|
6
|
+
|
|
7
|
+
from sqlseed._utils.logger import get_logger
|
|
8
|
+
from sqlseed._utils.sql_safe import quote_identifier
|
|
9
|
+
from sqlseed.database._protocol import ColumnInfo, ForeignKeyInfo, IndexInfo
|
|
10
|
+
from sqlseed.database.optimizer import PragmaOptimizer
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from collections.abc import Iterator
|
|
14
|
+
|
|
15
|
+
logger = get_logger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class SQLiteUtilsAdapter:
|
|
19
|
+
def __init__(self) -> None:
|
|
20
|
+
self._db: Any = None
|
|
21
|
+
self._optimizer: PragmaOptimizer | None = None
|
|
22
|
+
self._db_path: str = ""
|
|
23
|
+
|
|
24
|
+
def connect(self, db_path: str) -> None:
|
|
25
|
+
import sqlite_utils
|
|
26
|
+
|
|
27
|
+
self._db_path = db_path
|
|
28
|
+
self._db = sqlite_utils.Database(db_path)
|
|
29
|
+
self._optimizer = PragmaOptimizer(
|
|
30
|
+
execute_fn=self._execute_pragma,
|
|
31
|
+
fetch_pragma_fn=self._fetch_pragma,
|
|
32
|
+
)
|
|
33
|
+
logger.debug("Connected to database via sqlite-utils", db_path=db_path)
|
|
34
|
+
|
|
35
|
+
def close(self) -> None:
|
|
36
|
+
if self._db is not None:
|
|
37
|
+
self._db.close()
|
|
38
|
+
self._db = None
|
|
39
|
+
logger.debug("Closed sqlite-utils connection", db_path=self._db_path)
|
|
40
|
+
|
|
41
|
+
def get_table_names(self) -> list[str]:
|
|
42
|
+
return list(self._db.table_names())
|
|
43
|
+
|
|
44
|
+
def get_column_info(self, table_name: str) -> list[ColumnInfo]:
|
|
45
|
+
table = self._db[table_name]
|
|
46
|
+
pks = self.get_primary_keys(table_name)
|
|
47
|
+
fks = {fk.column for fk in self.get_foreign_keys(table_name)}
|
|
48
|
+
|
|
49
|
+
result: list[ColumnInfo] = []
|
|
50
|
+
for col in table.columns:
|
|
51
|
+
col_name = col.name
|
|
52
|
+
is_pk = col_name in pks
|
|
53
|
+
is_autoincrement = is_pk and self._is_autoincrement(table_name, col_name)
|
|
54
|
+
nullable = not is_pk and col_name not in fks and not col.notnull
|
|
55
|
+
|
|
56
|
+
default = col.default_value
|
|
57
|
+
|
|
58
|
+
result.append(
|
|
59
|
+
ColumnInfo(
|
|
60
|
+
name=col_name,
|
|
61
|
+
type=col.type if isinstance(col.type, str) else str(col.type),
|
|
62
|
+
nullable=nullable,
|
|
63
|
+
default=default,
|
|
64
|
+
is_primary_key=is_pk,
|
|
65
|
+
is_autoincrement=is_autoincrement,
|
|
66
|
+
)
|
|
67
|
+
)
|
|
68
|
+
return result
|
|
69
|
+
|
|
70
|
+
def get_primary_keys(self, table_name: str) -> list[str]:
|
|
71
|
+
try:
|
|
72
|
+
table = self._db[table_name]
|
|
73
|
+
pks = table.pks
|
|
74
|
+
return pks if pks else []
|
|
75
|
+
except Exception:
|
|
76
|
+
logger.debug("Failed to get primary keys", table=table_name)
|
|
77
|
+
return []
|
|
78
|
+
|
|
79
|
+
def get_foreign_keys(self, table_name: str) -> list[ForeignKeyInfo]:
|
|
80
|
+
try:
|
|
81
|
+
table = self._db[table_name]
|
|
82
|
+
fks = table.foreign_keys
|
|
83
|
+
return [
|
|
84
|
+
ForeignKeyInfo(
|
|
85
|
+
column=fk.column,
|
|
86
|
+
ref_table=fk.other_table,
|
|
87
|
+
ref_column=fk.other_column,
|
|
88
|
+
)
|
|
89
|
+
for fk in fks
|
|
90
|
+
]
|
|
91
|
+
except Exception:
|
|
92
|
+
logger.debug("Failed to get foreign keys", table=table_name)
|
|
93
|
+
return []
|
|
94
|
+
|
|
95
|
+
def get_row_count(self, table_name: str) -> int:
|
|
96
|
+
return int(self._db[table_name].count)
|
|
97
|
+
|
|
98
|
+
def get_column_values(self, table_name: str, column_name: str, limit: int = 1000) -> list[Any]:
|
|
99
|
+
safe_table = quote_identifier(table_name)
|
|
100
|
+
safe_column = quote_identifier(column_name)
|
|
101
|
+
sql = f"SELECT {safe_column} FROM {safe_table} LIMIT ?"
|
|
102
|
+
rows = self._db.execute(sql, [limit]).fetchall()
|
|
103
|
+
return [row[0] for row in rows]
|
|
104
|
+
|
|
105
|
+
def get_index_info(self, table_name: str) -> list[IndexInfo]:
|
|
106
|
+
safe_table = quote_identifier(table_name)
|
|
107
|
+
rows = self._db.execute(f"PRAGMA index_list({safe_table})").fetchall()
|
|
108
|
+
result: list[IndexInfo] = []
|
|
109
|
+
for row in rows:
|
|
110
|
+
idx_name = row[1]
|
|
111
|
+
is_unique = bool(row[2])
|
|
112
|
+
if idx_name.startswith("sqlite_autoindex_"):
|
|
113
|
+
continue
|
|
114
|
+
col_rows = self._db.execute(f"PRAGMA index_info({quote_identifier(idx_name)})").fetchall()
|
|
115
|
+
columns = [cr[2] for cr in col_rows if cr[2] is not None]
|
|
116
|
+
result.append(IndexInfo(name=idx_name, table=table_name, columns=columns, unique=is_unique))
|
|
117
|
+
return result
|
|
118
|
+
|
|
119
|
+
def get_sample_rows(self, table_name: str, limit: int = 5) -> list[dict[str, Any]]:
|
|
120
|
+
safe_table = quote_identifier(table_name)
|
|
121
|
+
columns = self.get_column_info(table_name)
|
|
122
|
+
col_names = [quote_identifier(c.name) for c in columns]
|
|
123
|
+
cols_sql = ", ".join(col_names)
|
|
124
|
+
sql = f"SELECT {cols_sql} FROM {safe_table} LIMIT ?"
|
|
125
|
+
rows = self._db.execute(sql, [limit]).fetchall()
|
|
126
|
+
col_name_list = [c.name for c in columns]
|
|
127
|
+
return [dict(zip(col_name_list, row, strict=False)) for row in rows]
|
|
128
|
+
|
|
129
|
+
def batch_insert(
|
|
130
|
+
self,
|
|
131
|
+
table_name: str,
|
|
132
|
+
data: Iterator[dict[str, Any]],
|
|
133
|
+
batch_size: int = 5000,
|
|
134
|
+
) -> int:
|
|
135
|
+
inserted = 0
|
|
136
|
+
batch: list[dict[str, Any]] = []
|
|
137
|
+
for row in data:
|
|
138
|
+
batch.append(row)
|
|
139
|
+
if len(batch) >= batch_size:
|
|
140
|
+
self._db[table_name].insert_all(batch)
|
|
141
|
+
inserted += len(batch)
|
|
142
|
+
batch = []
|
|
143
|
+
if batch:
|
|
144
|
+
self._db[table_name].insert_all(batch)
|
|
145
|
+
inserted += len(batch)
|
|
146
|
+
return inserted
|
|
147
|
+
|
|
148
|
+
def clear_table(self, table_name: str) -> None:
|
|
149
|
+
safe_table = quote_identifier(table_name)
|
|
150
|
+
self._db.execute(f"DELETE FROM {safe_table}")
|
|
151
|
+
logger.debug("Cleared table", table_name=table_name)
|
|
152
|
+
|
|
153
|
+
def optimize_for_bulk_write(self, expected_rows: int | None = None) -> None:
|
|
154
|
+
if self._optimizer is not None:
|
|
155
|
+
self._optimizer.preserve()
|
|
156
|
+
self._optimizer.optimize(expected_rows)
|
|
157
|
+
|
|
158
|
+
def restore_settings(self) -> None:
|
|
159
|
+
if self._optimizer is not None:
|
|
160
|
+
self._optimizer.restore()
|
|
161
|
+
|
|
162
|
+
def _is_autoincrement(self, table_name: str, column_name: str) -> bool:
|
|
163
|
+
from sqlseed._utils.schema_helpers import detect_autoincrement
|
|
164
|
+
|
|
165
|
+
return detect_autoincrement(self._db.execute, table_name, column_name)
|
|
166
|
+
|
|
167
|
+
def _execute_pragma(self, sql: str) -> None:
|
|
168
|
+
self._db.execute(sql)
|
|
169
|
+
|
|
170
|
+
def _fetch_pragma(self, name: str) -> Any:
|
|
171
|
+
result = self._db.execute(f"PRAGMA {name}").fetchone()
|
|
172
|
+
return result[0] if result else None
|
|
173
|
+
|
|
174
|
+
def __enter__(self) -> Self:
|
|
175
|
+
return self
|
|
176
|
+
|
|
177
|
+
def __exit__(
|
|
178
|
+
self,
|
|
179
|
+
exc_type: type[BaseException] | None,
|
|
180
|
+
exc_val: BaseException | None,
|
|
181
|
+
exc_tb: Any,
|
|
182
|
+
) -> None:
|
|
183
|
+
self.close()
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
from sqlseed.generators._protocol import DataProvider
|
|
2
|
+
from sqlseed.generators.base_provider import BaseProvider
|
|
3
|
+
from sqlseed.generators.registry import ProviderRegistry
|
|
4
|
+
from sqlseed.generators.stream import DataStream
|
|
5
|
+
|
|
6
|
+
__all__ = [
|
|
7
|
+
"BaseProvider",
|
|
8
|
+
"DataProvider",
|
|
9
|
+
"DataStream",
|
|
10
|
+
"ProviderRegistry",
|
|
11
|
+
]
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any, Protocol, runtime_checkable
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@runtime_checkable
|
|
7
|
+
class DataProvider(Protocol):
|
|
8
|
+
@property
|
|
9
|
+
def name(self) -> str: ...
|
|
10
|
+
|
|
11
|
+
def set_locale(self, locale: str) -> None: ...
|
|
12
|
+
|
|
13
|
+
def set_seed(self, seed: int) -> None: ...
|
|
14
|
+
|
|
15
|
+
def generate_string(
|
|
16
|
+
self,
|
|
17
|
+
*,
|
|
18
|
+
min_length: int = 1,
|
|
19
|
+
max_length: int = 100,
|
|
20
|
+
charset: str | None = None,
|
|
21
|
+
) -> str: ...
|
|
22
|
+
|
|
23
|
+
def generate_integer(self, *, min_value: int = 0, max_value: int = 999999) -> int: ...
|
|
24
|
+
|
|
25
|
+
def generate_float(
|
|
26
|
+
self,
|
|
27
|
+
*,
|
|
28
|
+
min_value: float = 0.0,
|
|
29
|
+
max_value: float = 999999.0,
|
|
30
|
+
precision: int = 2,
|
|
31
|
+
) -> float: ...
|
|
32
|
+
|
|
33
|
+
def generate_boolean(self) -> bool: ...
|
|
34
|
+
|
|
35
|
+
def generate_bytes(self, *, length: int = 16) -> bytes: ...
|
|
36
|
+
|
|
37
|
+
def generate_name(self) -> str: ...
|
|
38
|
+
|
|
39
|
+
def generate_first_name(self) -> str: ...
|
|
40
|
+
|
|
41
|
+
def generate_last_name(self) -> str: ...
|
|
42
|
+
|
|
43
|
+
def generate_email(self) -> str: ...
|
|
44
|
+
|
|
45
|
+
def generate_phone(self) -> str: ...
|
|
46
|
+
|
|
47
|
+
def generate_address(self) -> str: ...
|
|
48
|
+
|
|
49
|
+
def generate_company(self) -> str: ...
|
|
50
|
+
|
|
51
|
+
def generate_url(self) -> str: ...
|
|
52
|
+
|
|
53
|
+
def generate_ipv4(self) -> str: ...
|
|
54
|
+
|
|
55
|
+
def generate_uuid(self) -> str: ...
|
|
56
|
+
|
|
57
|
+
def generate_date(self, *, start_year: int = 2000, end_year: int | None = None) -> str: ...
|
|
58
|
+
|
|
59
|
+
def generate_datetime(self, *, start_year: int = 2000, end_year: int | None = None) -> str: ...
|
|
60
|
+
|
|
61
|
+
def generate_timestamp(self) -> int: ...
|
|
62
|
+
|
|
63
|
+
def generate_text(self, *, min_length: int = 50, max_length: int = 200) -> str: ...
|
|
64
|
+
|
|
65
|
+
def generate_sentence(self) -> str: ...
|
|
66
|
+
|
|
67
|
+
def generate_password(self, *, length: int = 16) -> str: ...
|
|
68
|
+
|
|
69
|
+
def generate_choice(self, choices: list[Any]) -> Any: ...
|
|
70
|
+
|
|
71
|
+
def generate_json(self, *, schema: dict[str, Any] | None = None) -> str: ...
|
|
72
|
+
|
|
73
|
+
def generate_pattern(self, *, regex: str) -> str: ...
|