thoth-dbmanager 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. thoth_dbmanager/ThothDbManager.py +459 -0
  2. thoth_dbmanager/__init__.py +136 -0
  3. thoth_dbmanager/adapters/__init__.py +21 -0
  4. thoth_dbmanager/adapters/mariadb.py +165 -0
  5. thoth_dbmanager/adapters/mysql.py +165 -0
  6. thoth_dbmanager/adapters/oracle.py +554 -0
  7. thoth_dbmanager/adapters/postgresql.py +444 -0
  8. thoth_dbmanager/adapters/sqlite.py +385 -0
  9. thoth_dbmanager/adapters/sqlserver.py +583 -0
  10. thoth_dbmanager/adapters/supabase.py +249 -0
  11. thoth_dbmanager/core/__init__.py +13 -0
  12. thoth_dbmanager/core/factory.py +272 -0
  13. thoth_dbmanager/core/interfaces.py +271 -0
  14. thoth_dbmanager/core/registry.py +220 -0
  15. thoth_dbmanager/documents.py +155 -0
  16. thoth_dbmanager/dynamic_imports.py +250 -0
  17. thoth_dbmanager/helpers/__init__.py +0 -0
  18. thoth_dbmanager/helpers/multi_db_generator.py +508 -0
  19. thoth_dbmanager/helpers/preprocess_values.py +159 -0
  20. thoth_dbmanager/helpers/schema.py +376 -0
  21. thoth_dbmanager/helpers/search.py +117 -0
  22. thoth_dbmanager/lsh/__init__.py +21 -0
  23. thoth_dbmanager/lsh/core.py +182 -0
  24. thoth_dbmanager/lsh/factory.py +76 -0
  25. thoth_dbmanager/lsh/manager.py +170 -0
  26. thoth_dbmanager/lsh/storage.py +96 -0
  27. thoth_dbmanager/plugins/__init__.py +23 -0
  28. thoth_dbmanager/plugins/mariadb.py +436 -0
  29. thoth_dbmanager/plugins/mysql.py +408 -0
  30. thoth_dbmanager/plugins/oracle.py +150 -0
  31. thoth_dbmanager/plugins/postgresql.py +145 -0
  32. thoth_dbmanager/plugins/sqlite.py +170 -0
  33. thoth_dbmanager/plugins/sqlserver.py +149 -0
  34. thoth_dbmanager/plugins/supabase.py +224 -0
  35. {thoth_dbmanager-0.4.0.dist-info → thoth_dbmanager-0.4.1.dist-info}/METADATA +6 -6
  36. thoth_dbmanager-0.4.1.dist-info/RECORD +39 -0
  37. thoth_dbmanager-0.4.1.dist-info/top_level.txt +1 -0
  38. thoth_dbmanager-0.4.0.dist-info/RECORD +0 -5
  39. thoth_dbmanager-0.4.0.dist-info/top_level.txt +0 -1
  40. {thoth_dbmanager-0.4.0.dist-info → thoth_dbmanager-0.4.1.dist-info}/WHEEL +0 -0
  41. {thoth_dbmanager-0.4.0.dist-info → thoth_dbmanager-0.4.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,385 @@
1
+ """
2
+ SQLite adapter implementation.
3
+ """
4
+ import logging
5
+ import sqlite3
6
+ from pathlib import Path
7
+ from typing import Any, Dict, List, Optional, Union
8
+ from sqlalchemy import create_engine, text
9
+ from sqlalchemy.exc import SQLAlchemyError
10
+
11
+ from ..core.interfaces import DbAdapter
12
+ from ..documents import (
13
+ TableDocument,
14
+ ColumnDocument,
15
+ SchemaDocument,
16
+ ForeignKeyDocument,
17
+ IndexDocument
18
+ )
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ class SQLiteAdapter(DbAdapter):
24
+ """
25
+ SQLite database adapter implementation.
26
+ """
27
+
28
+ def __init__(self, connection_params: Dict[str, Any]):
29
+ super().__init__(connection_params)
30
+ self.engine = None
31
+ self.raw_connection = None
32
+ self.database_path = None
33
+
34
+ def connect(self) -> None:
35
+ """Establish SQLite connection"""
36
+ try:
37
+ # Get database path
38
+ self.database_path = self.connection_params.get('database_path')
39
+ if not self.database_path:
40
+ raise ValueError("database_path is required for SQLite")
41
+
42
+ # Ensure directory exists
43
+ db_path = Path(self.database_path)
44
+ db_path.parent.mkdir(parents=True, exist_ok=True)
45
+
46
+ # Create SQLAlchemy engine
47
+ connection_string = f"sqlite:///{self.database_path}"
48
+ self.engine = create_engine(connection_string, echo=False)
49
+
50
+ # Test connection
51
+ with self.engine.connect() as conn:
52
+ conn.execute(text("SELECT 1"))
53
+
54
+ # Also create raw sqlite3 connection for specific operations
55
+ self.raw_connection = sqlite3.connect(self.database_path)
56
+ self.raw_connection.row_factory = sqlite3.Row # Enable column access by name
57
+
58
+ self._initialized = True
59
+ logger.info(f"SQLite connection established successfully: {self.database_path}")
60
+
61
+ except Exception as e:
62
+ logger.error(f"Failed to connect to SQLite: {e}")
63
+ raise
64
+
65
+ def disconnect(self) -> None:
66
+ """Close SQLite connection"""
67
+ try:
68
+ if self.engine:
69
+ self.engine.dispose()
70
+ self.engine = None
71
+
72
+ if self.raw_connection:
73
+ self.raw_connection.close()
74
+ self.raw_connection = None
75
+
76
+ self._initialized = False
77
+ logger.info("SQLite connection closed")
78
+
79
+ except Exception as e:
80
+ logger.error(f"Error closing SQLite connection: {e}")
81
+
82
+ def execute_query(self, query: str, params: Optional[Dict] = None, fetch: Union[str, int] = "all", timeout: int = 60) -> Any:
83
+ """Execute SQL query"""
84
+ if not self.engine:
85
+ raise RuntimeError("Not connected to database")
86
+
87
+ try:
88
+ with self.engine.connect() as conn:
89
+ # SQLite doesn't have query timeout, but we can set a connection timeout
90
+ conn.execute(text(f"PRAGMA busy_timeout = {timeout * 1000}")) # SQLite uses milliseconds
91
+
92
+ # Execute query
93
+ if params:
94
+ result = conn.execute(text(query), params)
95
+ else:
96
+ result = conn.execute(text(query))
97
+
98
+ # Handle different fetch modes
99
+ if query.strip().upper().startswith(('SELECT', 'WITH', 'PRAGMA')):
100
+ if fetch == "all":
101
+ return result.fetchall()
102
+ elif fetch == "one":
103
+ return result.fetchone()
104
+ elif isinstance(fetch, int):
105
+ return result.fetchmany(fetch)
106
+ else:
107
+ return result.fetchall()
108
+ else:
109
+ # For non-SELECT queries, return rowcount
110
+ conn.commit()
111
+ return result.rowcount
112
+
113
+ except SQLAlchemyError as e:
114
+ logger.error(f"SQLite query error: {e}")
115
+ raise
116
+
117
+ def get_tables_as_documents(self) -> List[TableDocument]:
118
+ """Get tables as document objects"""
119
+ query = """
120
+ SELECT
121
+ name as table_name,
122
+ sql as table_sql
123
+ FROM sqlite_master
124
+ WHERE type = 'table'
125
+ AND name NOT LIKE 'sqlite_%'
126
+ ORDER BY name
127
+ """
128
+
129
+ results = self.execute_query(query)
130
+ documents = []
131
+
132
+ for row in results:
133
+ # Extract comment from CREATE TABLE statement if present
134
+ comment = ""
135
+ if row.table_sql:
136
+ # Simple comment extraction - could be improved
137
+ sql_lines = row.table_sql.split('\n')
138
+ for line in sql_lines:
139
+ if '-- ' in line:
140
+ comment = line.split('-- ', 1)[1].strip()
141
+ break
142
+
143
+ doc = TableDocument(
144
+ table_name=row.table_name,
145
+ schema_name="main", # SQLite default schema
146
+ comment=comment
147
+ )
148
+ documents.append(doc)
149
+
150
+ return documents
151
+
152
+ def get_columns_as_documents(self, table_name: str) -> List[ColumnDocument]:
153
+ """Get columns as document objects"""
154
+ # Use PRAGMA table_info to get column information
155
+ query = f"PRAGMA table_info({table_name})"
156
+
157
+ results = self.execute_query(query)
158
+ documents = []
159
+
160
+ for row in results:
161
+ doc = ColumnDocument(
162
+ table_name=table_name,
163
+ column_name=row.name,
164
+ data_type=row.type,
165
+ comment="", # SQLite doesn't support column comments natively
166
+ is_pk=bool(row.pk),
167
+ is_nullable=not bool(row.notnull),
168
+ default_value=row.dflt_value,
169
+ schema_name="main"
170
+ )
171
+ documents.append(doc)
172
+
173
+ return documents
174
+
175
+ def get_foreign_keys_as_documents(self) -> List[ForeignKeyDocument]:
176
+ """Get foreign keys as document objects"""
177
+ documents = []
178
+
179
+ # Get all tables first
180
+ tables = self.get_tables_as_documents()
181
+
182
+ for table_doc in tables:
183
+ table_name = table_doc.table_name
184
+
185
+ # Use PRAGMA foreign_key_list to get foreign keys for each table
186
+ query = f"PRAGMA foreign_key_list({table_name})"
187
+
188
+ try:
189
+ results = self.execute_query(query)
190
+
191
+ for row in results:
192
+ doc = ForeignKeyDocument(
193
+ source_table_name=table_name,
194
+ source_column_name=row.from_,
195
+ target_table_name=row.table,
196
+ target_column_name=row.to,
197
+ constraint_name=f"fk_{table_name}_{row.id}", # SQLite doesn't name FKs
198
+ schema_name="main"
199
+ )
200
+ documents.append(doc)
201
+
202
+ except Exception as e:
203
+ logger.warning(f"Could not get foreign keys for table {table_name}: {e}")
204
+
205
+ return documents
206
+
207
+ def get_schemas_as_documents(self) -> List[SchemaDocument]:
208
+ """Get schemas as document objects"""
209
+ # SQLite has limited schema support, mainly 'main', 'temp', and attached databases
210
+ query = "PRAGMA database_list"
211
+
212
+ results = self.execute_query(query)
213
+ documents = []
214
+
215
+ for row in results:
216
+ doc = SchemaDocument(
217
+ schema_name=row.name,
218
+ description=f"SQLite database: {row.file or 'in-memory'}"
219
+ )
220
+ documents.append(doc)
221
+
222
+ return documents
223
+
224
+ def get_indexes_as_documents(self, table_name: Optional[str] = None) -> List[IndexDocument]:
225
+ """Get indexes as document objects"""
226
+ documents = []
227
+
228
+ if table_name:
229
+ tables = [table_name]
230
+ else:
231
+ # Get all tables
232
+ table_docs = self.get_tables_as_documents()
233
+ tables = [doc.table_name for doc in table_docs]
234
+
235
+ for table in tables:
236
+ # Get indexes for this table
237
+ query = f"PRAGMA index_list({table})"
238
+
239
+ try:
240
+ results = self.execute_query(query)
241
+
242
+ for row in results:
243
+ index_name = row.name
244
+
245
+ # Get index columns
246
+ col_query = f"PRAGMA index_info({index_name})"
247
+ col_results = self.execute_query(col_query)
248
+ columns = [col_row.name for col_row in col_results]
249
+
250
+ doc = IndexDocument(
251
+ index_name=index_name,
252
+ table_name=table,
253
+ columns=columns,
254
+ is_unique=bool(row.unique),
255
+ is_primary=index_name.startswith('sqlite_autoindex_'), # SQLite auto-creates these for PKs
256
+ index_type="btree", # SQLite primarily uses B-tree indexes
257
+ schema_name="main"
258
+ )
259
+ documents.append(doc)
260
+
261
+ except Exception as e:
262
+ logger.warning(f"Could not get indexes for table {table}: {e}")
263
+
264
+ return documents
265
+
266
+ def get_unique_values(self) -> Dict[str, Dict[str, List[str]]]:
267
+ """Get unique values from the database"""
268
+ result = {}
269
+
270
+ # Get all tables
271
+ tables = self.get_tables_as_documents()
272
+
273
+ for table_doc in tables:
274
+ table_name = table_doc.table_name
275
+
276
+ # Get columns for this table
277
+ columns = self.get_columns_as_documents(table_name)
278
+
279
+ result[table_name] = {}
280
+
281
+ for column_doc in columns:
282
+ column_name = column_doc.column_name
283
+
284
+ # Only get unique values for text columns to avoid large datasets
285
+ if column_doc.data_type.upper() in ['TEXT', 'VARCHAR', 'CHAR', 'STRING']:
286
+ try:
287
+ query = f"""
288
+ SELECT DISTINCT "{column_name}"
289
+ FROM "{table_name}"
290
+ WHERE "{column_name}" IS NOT NULL
291
+ AND LENGTH("{column_name}") > 0
292
+ ORDER BY "{column_name}"
293
+ LIMIT 1000
294
+ """
295
+
296
+ values = self.execute_query(query)
297
+ result[table_name][column_name] = [str(row[0]) for row in values if row[0]]
298
+
299
+ except Exception as e:
300
+ logger.warning(f"Could not get unique values for {table_name}.{column_name}: {e}")
301
+ result[table_name][column_name] = []
302
+ else:
303
+ result[table_name][column_name] = []
304
+
305
+ return result
306
+
307
+ def get_example_data(self, table_name: str, number_of_rows: int = 30) -> Dict[str, List[Any]]:
308
+ """
309
+ Retrieves the most frequent values for each column in the specified table.
310
+
311
+ Args:
312
+ table_name (str): The name of the table.
313
+ number_of_rows (int, optional): Maximum number of example values to return per column. Defaults to 30.
314
+
315
+ Returns:
316
+ Dict[str, List[Any]]: A dictionary mapping column names to lists of example values.
317
+ """
318
+ # First, verify the table exists
319
+ table_check_query = """
320
+ SELECT name FROM sqlite_master
321
+ WHERE type = 'table' AND name = :table_name
322
+ """
323
+
324
+ try:
325
+ table_check_result = self.execute_query(table_check_query, {"table_name": table_name})
326
+ if not table_check_result:
327
+ logger.warning(f"Table {table_name} not found")
328
+ return {}
329
+ except Exception as e:
330
+ logger.error(f"Error checking table {table_name}: {e}")
331
+ return {}
332
+
333
+ # Get column information using PRAGMA
334
+ try:
335
+ columns_result = self.execute_query(f"PRAGMA table_info({table_name})")
336
+ except Exception as e:
337
+ logger.error(f"Error getting columns for table {table_name}: {e}")
338
+ return {}
339
+
340
+ if not columns_result:
341
+ logger.warning(f"No columns found for table {table_name}")
342
+ return {}
343
+
344
+ most_frequent_values: Dict[str, List[Any]] = {}
345
+
346
+ for row in columns_result:
347
+ column_name = row[1] # column name is at index 1 in PRAGMA table_info
348
+ data_type = row[2] # data type is at index 2 in PRAGMA table_info
349
+
350
+ # SQLite uses double quotes for identifier quoting
351
+ quoted_column_name = f'"{column_name}"'
352
+ quoted_table_name = f'"{table_name}"'
353
+
354
+ # Query to get most frequent values
355
+ query_str = f"""
356
+ SELECT {quoted_column_name}
357
+ FROM (
358
+ SELECT {quoted_column_name}, COUNT(*) as _freq
359
+ FROM {quoted_table_name}
360
+ WHERE {quoted_column_name} IS NOT NULL
361
+ GROUP BY {quoted_column_name}
362
+ ORDER BY _freq DESC
363
+ LIMIT :num_rows
364
+ )
365
+ """
366
+
367
+ try:
368
+ result = self.execute_query(query_str, {"num_rows": number_of_rows})
369
+ values = [row[0] for row in result]
370
+ most_frequent_values[column_name] = values
371
+ except Exception as e:
372
+ logger.error(f"Error fetching frequent values for {column_name} in {table_name}: {e}")
373
+ most_frequent_values[column_name] = []
374
+
375
+ # Normalize list lengths
376
+ max_length = 0
377
+ if most_frequent_values:
378
+ max_length = max(len(v) for v in most_frequent_values.values()) if most_frequent_values else 0
379
+
380
+ for column_name in most_frequent_values:
381
+ current_len = len(most_frequent_values[column_name])
382
+ if current_len < max_length:
383
+ most_frequent_values[column_name].extend([None] * (max_length - current_len))
384
+
385
+ return most_frequent_values