thoth-dbmanager 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- thoth_dbmanager/ThothDbManager.py +459 -0
- thoth_dbmanager/__init__.py +136 -0
- thoth_dbmanager/adapters/__init__.py +21 -0
- thoth_dbmanager/adapters/mariadb.py +165 -0
- thoth_dbmanager/adapters/mysql.py +165 -0
- thoth_dbmanager/adapters/oracle.py +554 -0
- thoth_dbmanager/adapters/postgresql.py +444 -0
- thoth_dbmanager/adapters/sqlite.py +385 -0
- thoth_dbmanager/adapters/sqlserver.py +583 -0
- thoth_dbmanager/adapters/supabase.py +249 -0
- thoth_dbmanager/core/__init__.py +13 -0
- thoth_dbmanager/core/factory.py +272 -0
- thoth_dbmanager/core/interfaces.py +271 -0
- thoth_dbmanager/core/registry.py +220 -0
- thoth_dbmanager/documents.py +155 -0
- thoth_dbmanager/dynamic_imports.py +250 -0
- thoth_dbmanager/helpers/__init__.py +0 -0
- thoth_dbmanager/helpers/multi_db_generator.py +508 -0
- thoth_dbmanager/helpers/preprocess_values.py +159 -0
- thoth_dbmanager/helpers/schema.py +376 -0
- thoth_dbmanager/helpers/search.py +117 -0
- thoth_dbmanager/lsh/__init__.py +21 -0
- thoth_dbmanager/lsh/core.py +182 -0
- thoth_dbmanager/lsh/factory.py +76 -0
- thoth_dbmanager/lsh/manager.py +170 -0
- thoth_dbmanager/lsh/storage.py +96 -0
- thoth_dbmanager/plugins/__init__.py +23 -0
- thoth_dbmanager/plugins/mariadb.py +436 -0
- thoth_dbmanager/plugins/mysql.py +408 -0
- thoth_dbmanager/plugins/oracle.py +150 -0
- thoth_dbmanager/plugins/postgresql.py +145 -0
- thoth_dbmanager/plugins/sqlite.py +170 -0
- thoth_dbmanager/plugins/sqlserver.py +149 -0
- thoth_dbmanager/plugins/supabase.py +224 -0
- {thoth_dbmanager-0.4.0.dist-info → thoth_dbmanager-0.4.1.dist-info}/METADATA +6 -6
- thoth_dbmanager-0.4.1.dist-info/RECORD +39 -0
- thoth_dbmanager-0.4.1.dist-info/top_level.txt +1 -0
- thoth_dbmanager-0.4.0.dist-info/RECORD +0 -5
- thoth_dbmanager-0.4.0.dist-info/top_level.txt +0 -1
- {thoth_dbmanager-0.4.0.dist-info → thoth_dbmanager-0.4.1.dist-info}/WHEEL +0 -0
- {thoth_dbmanager-0.4.0.dist-info → thoth_dbmanager-0.4.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,385 @@
|
|
1
|
+
"""
|
2
|
+
SQLite adapter implementation.
|
3
|
+
"""
|
4
|
+
import logging
|
5
|
+
import sqlite3
|
6
|
+
from pathlib import Path
|
7
|
+
from typing import Any, Dict, List, Optional, Union
|
8
|
+
from sqlalchemy import create_engine, text
|
9
|
+
from sqlalchemy.exc import SQLAlchemyError
|
10
|
+
|
11
|
+
from ..core.interfaces import DbAdapter
|
12
|
+
from ..documents import (
|
13
|
+
TableDocument,
|
14
|
+
ColumnDocument,
|
15
|
+
SchemaDocument,
|
16
|
+
ForeignKeyDocument,
|
17
|
+
IndexDocument
|
18
|
+
)
|
19
|
+
|
20
|
+
logger = logging.getLogger(__name__)
|
21
|
+
|
22
|
+
|
23
|
+
class SQLiteAdapter(DbAdapter):
|
24
|
+
"""
|
25
|
+
SQLite database adapter implementation.
|
26
|
+
"""
|
27
|
+
|
28
|
+
def __init__(self, connection_params: Dict[str, Any]):
|
29
|
+
super().__init__(connection_params)
|
30
|
+
self.engine = None
|
31
|
+
self.raw_connection = None
|
32
|
+
self.database_path = None
|
33
|
+
|
34
|
+
def connect(self) -> None:
|
35
|
+
"""Establish SQLite connection"""
|
36
|
+
try:
|
37
|
+
# Get database path
|
38
|
+
self.database_path = self.connection_params.get('database_path')
|
39
|
+
if not self.database_path:
|
40
|
+
raise ValueError("database_path is required for SQLite")
|
41
|
+
|
42
|
+
# Ensure directory exists
|
43
|
+
db_path = Path(self.database_path)
|
44
|
+
db_path.parent.mkdir(parents=True, exist_ok=True)
|
45
|
+
|
46
|
+
# Create SQLAlchemy engine
|
47
|
+
connection_string = f"sqlite:///{self.database_path}"
|
48
|
+
self.engine = create_engine(connection_string, echo=False)
|
49
|
+
|
50
|
+
# Test connection
|
51
|
+
with self.engine.connect() as conn:
|
52
|
+
conn.execute(text("SELECT 1"))
|
53
|
+
|
54
|
+
# Also create raw sqlite3 connection for specific operations
|
55
|
+
self.raw_connection = sqlite3.connect(self.database_path)
|
56
|
+
self.raw_connection.row_factory = sqlite3.Row # Enable column access by name
|
57
|
+
|
58
|
+
self._initialized = True
|
59
|
+
logger.info(f"SQLite connection established successfully: {self.database_path}")
|
60
|
+
|
61
|
+
except Exception as e:
|
62
|
+
logger.error(f"Failed to connect to SQLite: {e}")
|
63
|
+
raise
|
64
|
+
|
65
|
+
def disconnect(self) -> None:
|
66
|
+
"""Close SQLite connection"""
|
67
|
+
try:
|
68
|
+
if self.engine:
|
69
|
+
self.engine.dispose()
|
70
|
+
self.engine = None
|
71
|
+
|
72
|
+
if self.raw_connection:
|
73
|
+
self.raw_connection.close()
|
74
|
+
self.raw_connection = None
|
75
|
+
|
76
|
+
self._initialized = False
|
77
|
+
logger.info("SQLite connection closed")
|
78
|
+
|
79
|
+
except Exception as e:
|
80
|
+
logger.error(f"Error closing SQLite connection: {e}")
|
81
|
+
|
82
|
+
def execute_query(self, query: str, params: Optional[Dict] = None, fetch: Union[str, int] = "all", timeout: int = 60) -> Any:
|
83
|
+
"""Execute SQL query"""
|
84
|
+
if not self.engine:
|
85
|
+
raise RuntimeError("Not connected to database")
|
86
|
+
|
87
|
+
try:
|
88
|
+
with self.engine.connect() as conn:
|
89
|
+
# SQLite doesn't have query timeout, but we can set a connection timeout
|
90
|
+
conn.execute(text(f"PRAGMA busy_timeout = {timeout * 1000}")) # SQLite uses milliseconds
|
91
|
+
|
92
|
+
# Execute query
|
93
|
+
if params:
|
94
|
+
result = conn.execute(text(query), params)
|
95
|
+
else:
|
96
|
+
result = conn.execute(text(query))
|
97
|
+
|
98
|
+
# Handle different fetch modes
|
99
|
+
if query.strip().upper().startswith(('SELECT', 'WITH', 'PRAGMA')):
|
100
|
+
if fetch == "all":
|
101
|
+
return result.fetchall()
|
102
|
+
elif fetch == "one":
|
103
|
+
return result.fetchone()
|
104
|
+
elif isinstance(fetch, int):
|
105
|
+
return result.fetchmany(fetch)
|
106
|
+
else:
|
107
|
+
return result.fetchall()
|
108
|
+
else:
|
109
|
+
# For non-SELECT queries, return rowcount
|
110
|
+
conn.commit()
|
111
|
+
return result.rowcount
|
112
|
+
|
113
|
+
except SQLAlchemyError as e:
|
114
|
+
logger.error(f"SQLite query error: {e}")
|
115
|
+
raise
|
116
|
+
|
117
|
+
def get_tables_as_documents(self) -> List[TableDocument]:
|
118
|
+
"""Get tables as document objects"""
|
119
|
+
query = """
|
120
|
+
SELECT
|
121
|
+
name as table_name,
|
122
|
+
sql as table_sql
|
123
|
+
FROM sqlite_master
|
124
|
+
WHERE type = 'table'
|
125
|
+
AND name NOT LIKE 'sqlite_%'
|
126
|
+
ORDER BY name
|
127
|
+
"""
|
128
|
+
|
129
|
+
results = self.execute_query(query)
|
130
|
+
documents = []
|
131
|
+
|
132
|
+
for row in results:
|
133
|
+
# Extract comment from CREATE TABLE statement if present
|
134
|
+
comment = ""
|
135
|
+
if row.table_sql:
|
136
|
+
# Simple comment extraction - could be improved
|
137
|
+
sql_lines = row.table_sql.split('\n')
|
138
|
+
for line in sql_lines:
|
139
|
+
if '-- ' in line:
|
140
|
+
comment = line.split('-- ', 1)[1].strip()
|
141
|
+
break
|
142
|
+
|
143
|
+
doc = TableDocument(
|
144
|
+
table_name=row.table_name,
|
145
|
+
schema_name="main", # SQLite default schema
|
146
|
+
comment=comment
|
147
|
+
)
|
148
|
+
documents.append(doc)
|
149
|
+
|
150
|
+
return documents
|
151
|
+
|
152
|
+
def get_columns_as_documents(self, table_name: str) -> List[ColumnDocument]:
|
153
|
+
"""Get columns as document objects"""
|
154
|
+
# Use PRAGMA table_info to get column information
|
155
|
+
query = f"PRAGMA table_info({table_name})"
|
156
|
+
|
157
|
+
results = self.execute_query(query)
|
158
|
+
documents = []
|
159
|
+
|
160
|
+
for row in results:
|
161
|
+
doc = ColumnDocument(
|
162
|
+
table_name=table_name,
|
163
|
+
column_name=row.name,
|
164
|
+
data_type=row.type,
|
165
|
+
comment="", # SQLite doesn't support column comments natively
|
166
|
+
is_pk=bool(row.pk),
|
167
|
+
is_nullable=not bool(row.notnull),
|
168
|
+
default_value=row.dflt_value,
|
169
|
+
schema_name="main"
|
170
|
+
)
|
171
|
+
documents.append(doc)
|
172
|
+
|
173
|
+
return documents
|
174
|
+
|
175
|
+
def get_foreign_keys_as_documents(self) -> List[ForeignKeyDocument]:
|
176
|
+
"""Get foreign keys as document objects"""
|
177
|
+
documents = []
|
178
|
+
|
179
|
+
# Get all tables first
|
180
|
+
tables = self.get_tables_as_documents()
|
181
|
+
|
182
|
+
for table_doc in tables:
|
183
|
+
table_name = table_doc.table_name
|
184
|
+
|
185
|
+
# Use PRAGMA foreign_key_list to get foreign keys for each table
|
186
|
+
query = f"PRAGMA foreign_key_list({table_name})"
|
187
|
+
|
188
|
+
try:
|
189
|
+
results = self.execute_query(query)
|
190
|
+
|
191
|
+
for row in results:
|
192
|
+
doc = ForeignKeyDocument(
|
193
|
+
source_table_name=table_name,
|
194
|
+
source_column_name=row.from_,
|
195
|
+
target_table_name=row.table,
|
196
|
+
target_column_name=row.to,
|
197
|
+
constraint_name=f"fk_{table_name}_{row.id}", # SQLite doesn't name FKs
|
198
|
+
schema_name="main"
|
199
|
+
)
|
200
|
+
documents.append(doc)
|
201
|
+
|
202
|
+
except Exception as e:
|
203
|
+
logger.warning(f"Could not get foreign keys for table {table_name}: {e}")
|
204
|
+
|
205
|
+
return documents
|
206
|
+
|
207
|
+
def get_schemas_as_documents(self) -> List[SchemaDocument]:
|
208
|
+
"""Get schemas as document objects"""
|
209
|
+
# SQLite has limited schema support, mainly 'main', 'temp', and attached databases
|
210
|
+
query = "PRAGMA database_list"
|
211
|
+
|
212
|
+
results = self.execute_query(query)
|
213
|
+
documents = []
|
214
|
+
|
215
|
+
for row in results:
|
216
|
+
doc = SchemaDocument(
|
217
|
+
schema_name=row.name,
|
218
|
+
description=f"SQLite database: {row.file or 'in-memory'}"
|
219
|
+
)
|
220
|
+
documents.append(doc)
|
221
|
+
|
222
|
+
return documents
|
223
|
+
|
224
|
+
def get_indexes_as_documents(self, table_name: Optional[str] = None) -> List[IndexDocument]:
|
225
|
+
"""Get indexes as document objects"""
|
226
|
+
documents = []
|
227
|
+
|
228
|
+
if table_name:
|
229
|
+
tables = [table_name]
|
230
|
+
else:
|
231
|
+
# Get all tables
|
232
|
+
table_docs = self.get_tables_as_documents()
|
233
|
+
tables = [doc.table_name for doc in table_docs]
|
234
|
+
|
235
|
+
for table in tables:
|
236
|
+
# Get indexes for this table
|
237
|
+
query = f"PRAGMA index_list({table})"
|
238
|
+
|
239
|
+
try:
|
240
|
+
results = self.execute_query(query)
|
241
|
+
|
242
|
+
for row in results:
|
243
|
+
index_name = row.name
|
244
|
+
|
245
|
+
# Get index columns
|
246
|
+
col_query = f"PRAGMA index_info({index_name})"
|
247
|
+
col_results = self.execute_query(col_query)
|
248
|
+
columns = [col_row.name for col_row in col_results]
|
249
|
+
|
250
|
+
doc = IndexDocument(
|
251
|
+
index_name=index_name,
|
252
|
+
table_name=table,
|
253
|
+
columns=columns,
|
254
|
+
is_unique=bool(row.unique),
|
255
|
+
is_primary=index_name.startswith('sqlite_autoindex_'), # SQLite auto-creates these for PKs
|
256
|
+
index_type="btree", # SQLite primarily uses B-tree indexes
|
257
|
+
schema_name="main"
|
258
|
+
)
|
259
|
+
documents.append(doc)
|
260
|
+
|
261
|
+
except Exception as e:
|
262
|
+
logger.warning(f"Could not get indexes for table {table}: {e}")
|
263
|
+
|
264
|
+
return documents
|
265
|
+
|
266
|
+
def get_unique_values(self) -> Dict[str, Dict[str, List[str]]]:
|
267
|
+
"""Get unique values from the database"""
|
268
|
+
result = {}
|
269
|
+
|
270
|
+
# Get all tables
|
271
|
+
tables = self.get_tables_as_documents()
|
272
|
+
|
273
|
+
for table_doc in tables:
|
274
|
+
table_name = table_doc.table_name
|
275
|
+
|
276
|
+
# Get columns for this table
|
277
|
+
columns = self.get_columns_as_documents(table_name)
|
278
|
+
|
279
|
+
result[table_name] = {}
|
280
|
+
|
281
|
+
for column_doc in columns:
|
282
|
+
column_name = column_doc.column_name
|
283
|
+
|
284
|
+
# Only get unique values for text columns to avoid large datasets
|
285
|
+
if column_doc.data_type.upper() in ['TEXT', 'VARCHAR', 'CHAR', 'STRING']:
|
286
|
+
try:
|
287
|
+
query = f"""
|
288
|
+
SELECT DISTINCT "{column_name}"
|
289
|
+
FROM "{table_name}"
|
290
|
+
WHERE "{column_name}" IS NOT NULL
|
291
|
+
AND LENGTH("{column_name}") > 0
|
292
|
+
ORDER BY "{column_name}"
|
293
|
+
LIMIT 1000
|
294
|
+
"""
|
295
|
+
|
296
|
+
values = self.execute_query(query)
|
297
|
+
result[table_name][column_name] = [str(row[0]) for row in values if row[0]]
|
298
|
+
|
299
|
+
except Exception as e:
|
300
|
+
logger.warning(f"Could not get unique values for {table_name}.{column_name}: {e}")
|
301
|
+
result[table_name][column_name] = []
|
302
|
+
else:
|
303
|
+
result[table_name][column_name] = []
|
304
|
+
|
305
|
+
return result
|
306
|
+
|
307
|
+
def get_example_data(self, table_name: str, number_of_rows: int = 30) -> Dict[str, List[Any]]:
|
308
|
+
"""
|
309
|
+
Retrieves the most frequent values for each column in the specified table.
|
310
|
+
|
311
|
+
Args:
|
312
|
+
table_name (str): The name of the table.
|
313
|
+
number_of_rows (int, optional): Maximum number of example values to return per column. Defaults to 30.
|
314
|
+
|
315
|
+
Returns:
|
316
|
+
Dict[str, List[Any]]: A dictionary mapping column names to lists of example values.
|
317
|
+
"""
|
318
|
+
# First, verify the table exists
|
319
|
+
table_check_query = """
|
320
|
+
SELECT name FROM sqlite_master
|
321
|
+
WHERE type = 'table' AND name = :table_name
|
322
|
+
"""
|
323
|
+
|
324
|
+
try:
|
325
|
+
table_check_result = self.execute_query(table_check_query, {"table_name": table_name})
|
326
|
+
if not table_check_result:
|
327
|
+
logger.warning(f"Table {table_name} not found")
|
328
|
+
return {}
|
329
|
+
except Exception as e:
|
330
|
+
logger.error(f"Error checking table {table_name}: {e}")
|
331
|
+
return {}
|
332
|
+
|
333
|
+
# Get column information using PRAGMA
|
334
|
+
try:
|
335
|
+
columns_result = self.execute_query(f"PRAGMA table_info({table_name})")
|
336
|
+
except Exception as e:
|
337
|
+
logger.error(f"Error getting columns for table {table_name}: {e}")
|
338
|
+
return {}
|
339
|
+
|
340
|
+
if not columns_result:
|
341
|
+
logger.warning(f"No columns found for table {table_name}")
|
342
|
+
return {}
|
343
|
+
|
344
|
+
most_frequent_values: Dict[str, List[Any]] = {}
|
345
|
+
|
346
|
+
for row in columns_result:
|
347
|
+
column_name = row[1] # column name is at index 1 in PRAGMA table_info
|
348
|
+
data_type = row[2] # data type is at index 2 in PRAGMA table_info
|
349
|
+
|
350
|
+
# SQLite uses double quotes for identifier quoting
|
351
|
+
quoted_column_name = f'"{column_name}"'
|
352
|
+
quoted_table_name = f'"{table_name}"'
|
353
|
+
|
354
|
+
# Query to get most frequent values
|
355
|
+
query_str = f"""
|
356
|
+
SELECT {quoted_column_name}
|
357
|
+
FROM (
|
358
|
+
SELECT {quoted_column_name}, COUNT(*) as _freq
|
359
|
+
FROM {quoted_table_name}
|
360
|
+
WHERE {quoted_column_name} IS NOT NULL
|
361
|
+
GROUP BY {quoted_column_name}
|
362
|
+
ORDER BY _freq DESC
|
363
|
+
LIMIT :num_rows
|
364
|
+
)
|
365
|
+
"""
|
366
|
+
|
367
|
+
try:
|
368
|
+
result = self.execute_query(query_str, {"num_rows": number_of_rows})
|
369
|
+
values = [row[0] for row in result]
|
370
|
+
most_frequent_values[column_name] = values
|
371
|
+
except Exception as e:
|
372
|
+
logger.error(f"Error fetching frequent values for {column_name} in {table_name}: {e}")
|
373
|
+
most_frequent_values[column_name] = []
|
374
|
+
|
375
|
+
# Normalize list lengths
|
376
|
+
max_length = 0
|
377
|
+
if most_frequent_values:
|
378
|
+
max_length = max(len(v) for v in most_frequent_values.values()) if most_frequent_values else 0
|
379
|
+
|
380
|
+
for column_name in most_frequent_values:
|
381
|
+
current_len = len(most_frequent_values[column_name])
|
382
|
+
if current_len < max_length:
|
383
|
+
most_frequent_values[column_name].extend([None] * (max_length - current_len))
|
384
|
+
|
385
|
+
return most_frequent_values
|