thoth-dbmanager 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. thoth_dbmanager/ThothDbManager.py +459 -0
  2. thoth_dbmanager/__init__.py +136 -0
  3. thoth_dbmanager/adapters/__init__.py +21 -0
  4. thoth_dbmanager/adapters/mariadb.py +165 -0
  5. thoth_dbmanager/adapters/mysql.py +165 -0
  6. thoth_dbmanager/adapters/oracle.py +554 -0
  7. thoth_dbmanager/adapters/postgresql.py +444 -0
  8. thoth_dbmanager/adapters/qdrant.py +189 -0
  9. thoth_dbmanager/adapters/sqlite.py +385 -0
  10. thoth_dbmanager/adapters/sqlserver.py +583 -0
  11. thoth_dbmanager/adapters/supabase.py +249 -0
  12. thoth_dbmanager/core/__init__.py +13 -0
  13. thoth_dbmanager/core/factory.py +272 -0
  14. thoth_dbmanager/core/interfaces.py +271 -0
  15. thoth_dbmanager/core/registry.py +220 -0
  16. thoth_dbmanager/documents.py +155 -0
  17. thoth_dbmanager/dynamic_imports.py +250 -0
  18. thoth_dbmanager/helpers/__init__.py +0 -0
  19. thoth_dbmanager/helpers/multi_db_generator.py +508 -0
  20. thoth_dbmanager/helpers/preprocess_values.py +159 -0
  21. thoth_dbmanager/helpers/schema.py +376 -0
  22. thoth_dbmanager/helpers/search.py +117 -0
  23. thoth_dbmanager/lsh/__init__.py +21 -0
  24. thoth_dbmanager/lsh/core.py +182 -0
  25. thoth_dbmanager/lsh/factory.py +76 -0
  26. thoth_dbmanager/lsh/manager.py +170 -0
  27. thoth_dbmanager/lsh/storage.py +96 -0
  28. thoth_dbmanager/plugins/__init__.py +23 -0
  29. thoth_dbmanager/plugins/mariadb.py +436 -0
  30. thoth_dbmanager/plugins/mysql.py +408 -0
  31. thoth_dbmanager/plugins/oracle.py +150 -0
  32. thoth_dbmanager/plugins/postgresql.py +145 -0
  33. thoth_dbmanager/plugins/qdrant.py +41 -0
  34. thoth_dbmanager/plugins/sqlite.py +170 -0
  35. thoth_dbmanager/plugins/sqlserver.py +149 -0
  36. thoth_dbmanager/plugins/supabase.py +224 -0
  37. {thoth_dbmanager-0.4.0.dist-info → thoth_dbmanager-0.4.2.dist-info}/METADATA +9 -6
  38. thoth_dbmanager-0.4.2.dist-info/RECORD +41 -0
  39. thoth_dbmanager-0.4.2.dist-info/top_level.txt +1 -0
  40. thoth_dbmanager-0.4.0.dist-info/RECORD +0 -5
  41. thoth_dbmanager-0.4.0.dist-info/top_level.txt +0 -1
  42. {thoth_dbmanager-0.4.0.dist-info → thoth_dbmanager-0.4.2.dist-info}/WHEEL +0 -0
  43. {thoth_dbmanager-0.4.0.dist-info → thoth_dbmanager-0.4.2.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,436 @@
1
+ """
2
+ MariaDB plugin for Thoth SQL Database Manager.
3
+ Unified implementation combining plugin architecture with full database functionality.
4
+ """
5
+
6
+ import logging
7
+ import os
8
+ from pathlib import Path
9
+ from threading import Lock
10
+ from typing import Any, Dict, List, Optional, Union
11
+
12
+ from sqlalchemy import create_engine, inspect, text
13
+ from sqlalchemy.exc import SQLAlchemyError
14
+
15
+ from ..core.interfaces import DbPlugin, DbAdapter
16
+ from ..core.registry import register_plugin
17
+ from ..documents import TableDocument, ColumnDocument, ForeignKeyDocument, SchemaDocument, IndexDocument
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ class MariaDBAdapter(DbAdapter):
23
+ """MariaDB database adapter with full functionality."""
24
+
25
+ def __init__(self, connection_params: Dict[str, Any]):
26
+ super().__init__(connection_params)
27
+ self.engine = None
28
+ self.host = connection_params.get('host')
29
+ self.port = connection_params.get('port', 3306)
30
+ self.dbname = connection_params.get('database') or connection_params.get('dbname')
31
+ self.user = connection_params.get('user') or connection_params.get('username')
32
+ self.password = connection_params.get('password')
33
+
34
+ def connect(self) -> None:
35
+ """Establish database connection."""
36
+ try:
37
+ # Try different connection methods for MariaDB
38
+ connection_methods = [
39
+ # Use MySQL driver (MariaDB is MySQL-compatible)
40
+ f"mysql+pymysql://{self.user}:{self.password}@{self.host}:{self.port}/{self.dbname}",
41
+ # Use MySQL connector with explicit TCP
42
+ f"mysql+mysqlconnector://{self.user}:{self.password}@{self.host}:{self.port}/{self.dbname}",
43
+ # Use MariaDB connector with TCP parameters
44
+ f"mariadb+mariadbconnector://{self.user}:{self.password}@{self.host}:{self.port}/{self.dbname}?unix_socket=",
45
+ ]
46
+
47
+ last_error = None
48
+ for connection_string in connection_methods:
49
+ try:
50
+ self.engine = create_engine(connection_string, pool_pre_ping=True)
51
+ # Test the connection
52
+ with self.engine.connect() as conn:
53
+ conn.execute(text("SELECT 1"))
54
+ self.connection = self.engine
55
+ self._initialized = True
56
+ logger.info(f"MariaDB connected using: {connection_string.split('://')[0]}")
57
+ return
58
+ except Exception as e:
59
+ last_error = e
60
+ logger.debug(f"MariaDB connection failed with {connection_string.split('://')[0]}: {e}")
61
+ if self.engine:
62
+ self.engine.dispose()
63
+ self.engine = None
64
+ continue
65
+
66
+ # If all methods fail, raise the last error
67
+ raise ConnectionError(f"Failed to connect to MariaDB: {last_error}")
68
+
69
+ except Exception as e:
70
+ raise ConnectionError(f"Failed to connect to MariaDB: {e}")
71
+
72
+ def disconnect(self) -> None:
73
+ """Close database connection."""
74
+ if self.engine:
75
+ self.engine.dispose()
76
+ self.engine = None
77
+ self.connection = None
78
+
79
+ def execute_query(self, query: str, params: Optional[Dict] = None, fetch: Union[str, int] = "all", timeout: int = 60) -> Any:
80
+ """Execute a query and return results."""
81
+ if not self.engine:
82
+ self.connect()
83
+
84
+ with self.engine.connect() as connection:
85
+ try:
86
+ if params:
87
+ result = connection.execute(text(query), params)
88
+ else:
89
+ result = connection.execute(text(query))
90
+
91
+ # Check if this is a query that returns rows (SELECT, SHOW, etc.)
92
+ query_upper = query.strip().upper()
93
+ if query_upper.startswith(('SELECT', 'SHOW', 'DESCRIBE', 'DESC', 'EXPLAIN', 'WITH')):
94
+ if fetch == "all":
95
+ return [row._asdict() for row in result.fetchall()]
96
+ elif fetch == "one":
97
+ row = result.fetchone()
98
+ return row._asdict() if row else None
99
+ elif isinstance(fetch, int) and fetch > 0:
100
+ return [row._asdict() for row in result.fetchmany(fetch)]
101
+ else:
102
+ return [row._asdict() for row in result.fetchall()]
103
+ else:
104
+ # For DDL/DML queries (CREATE, INSERT, UPDATE, DELETE), return rowcount
105
+ connection.commit()
106
+ return result.rowcount
107
+ except SQLAlchemyError as e:
108
+ logger.error(f"Error executing SQL: {str(e)}")
109
+ raise e
110
+
111
+ def get_tables_as_documents(self) -> List[TableDocument]:
112
+ """Return tables as document objects."""
113
+ inspector = inspect(self.engine)
114
+ table_names = inspector.get_table_names()
115
+ tables = []
116
+
117
+ for table_name in table_names:
118
+ try:
119
+ table_comment = inspector.get_table_comment(table_name).get('text', '')
120
+ except SQLAlchemyError:
121
+ table_comment = ''
122
+
123
+ tables.append(TableDocument(
124
+ table_name=table_name,
125
+ schema_name="", # MariaDB doesn't have explicit schemas like PostgreSQL
126
+ comment=table_comment or "",
127
+ row_count=None # Could be populated if needed
128
+ ))
129
+
130
+ return tables
131
+
132
+ def get_columns_as_documents(self, table_name: str) -> List[ColumnDocument]:
133
+ """Return columns as document objects."""
134
+ inspector = inspect(self.engine)
135
+ columns_metadata = inspector.get_columns(table_name)
136
+ pk_columns = inspector.get_pk_constraint(table_name).get('constrained_columns', [])
137
+
138
+ columns = []
139
+ for col_meta in columns_metadata:
140
+ columns.append(ColumnDocument(
141
+ table_name=table_name,
142
+ column_name=col_meta['name'],
143
+ data_type=str(col_meta['type']),
144
+ is_nullable=col_meta.get('nullable', True),
145
+ is_pk=col_meta['name'] in pk_columns,
146
+ comment=col_meta.get('comment', '') or ""
147
+ ))
148
+
149
+ return columns
150
+
151
+ def get_foreign_keys_as_documents(self) -> List[ForeignKeyDocument]:
152
+ """Return foreign keys as document objects."""
153
+ inspector = inspect(self.engine)
154
+ all_foreign_keys = []
155
+
156
+ for table_name in inspector.get_table_names():
157
+ fks = inspector.get_foreign_keys(table_name)
158
+ for fk in fks:
159
+ all_foreign_keys.append(ForeignKeyDocument(
160
+ source_table_name=table_name,
161
+ source_column_name=fk['constrained_columns'][0],
162
+ target_table_name=fk['referred_table'],
163
+ target_column_name=fk['referred_columns'][0],
164
+ constraint_name=fk.get('name', '')
165
+ ))
166
+
167
+ return all_foreign_keys
168
+
169
+ def get_schemas_as_documents(self) -> List[SchemaDocument]:
170
+ """Return schemas as document objects."""
171
+ # MariaDB doesn't have explicit schemas like PostgreSQL
172
+ return [SchemaDocument(
173
+ schema_name="default",
174
+ comment="Default MariaDB schema"
175
+ )]
176
+
177
+ def get_indexes_as_documents(self, table_name: Optional[str] = None) -> List[IndexDocument]:
178
+ """Return indexes as document objects."""
179
+ inspector = inspect(self.engine)
180
+ indexes = []
181
+
182
+ tables = [table_name] if table_name else inspector.get_table_names()
183
+
184
+ for tbl_name in tables:
185
+ try:
186
+ table_indexes = inspector.get_indexes(tbl_name)
187
+ for idx in table_indexes:
188
+ indexes.append(IndexDocument(
189
+ table_name=tbl_name,
190
+ index_name=idx['name'],
191
+ column_names=idx['column_names'],
192
+ is_unique=idx['unique'],
193
+ index_type="BTREE" # Default for MariaDB
194
+ ))
195
+ except SQLAlchemyError as e:
196
+ logger.warning(f"Could not get indexes for table {tbl_name}: {e}")
197
+
198
+ return indexes
199
+
200
+ def get_unique_values(self) -> Dict[str, Dict[str, List[str]]]:
201
+ """Get unique values from the database."""
202
+ # This is a placeholder implementation.
203
+ # A more sophisticated version like in ThothPgManager should be implemented.
204
+ return {}
205
+
206
+ def get_example_data(self, table_name: str, number_of_rows: int = 30) -> Dict[str, List[Any]]:
207
+ """Get example data (most frequent values) for each column in a table."""
208
+ inspector = inspect(self.engine)
209
+ try:
210
+ columns = inspector.get_columns(table_name)
211
+ except SQLAlchemyError as e:
212
+ logger.error(f"Error inspecting columns for table {table_name}: {e}")
213
+ raise e
214
+
215
+ if not columns:
216
+ logger.warning(f"No columns found for table {table_name}")
217
+ return {}
218
+
219
+ most_frequent_values: Dict[str, List[Any]] = {}
220
+
221
+ with self.engine.connect() as connection:
222
+ for col_info in columns:
223
+ column_name = col_info['name']
224
+ # MariaDB uses backticks for identifier quoting (same as MySQL)
225
+ quoted_column_name = f'`{column_name}`'
226
+ quoted_table_name = f'`{table_name}`'
227
+
228
+ query_str = f"""
229
+ SELECT {quoted_column_name}
230
+ FROM (
231
+ SELECT {quoted_column_name}, COUNT(*) as _freq
232
+ FROM {quoted_table_name}
233
+ WHERE {quoted_column_name} IS NOT NULL
234
+ GROUP BY {quoted_column_name}
235
+ ORDER BY _freq DESC
236
+ LIMIT :num_rows
237
+ ) as subquery;
238
+ """
239
+ try:
240
+ result = connection.execute(text(query_str), {"num_rows": number_of_rows})
241
+ values = [row[0] for row in result]
242
+ most_frequent_values[column_name] = values
243
+ except SQLAlchemyError as e:
244
+ logger.error(f"Error fetching frequent values for {column_name} in {table_name}: {e}")
245
+ most_frequent_values[column_name] = []
246
+
247
+ # Normalize list lengths
248
+ max_length = 0
249
+ if most_frequent_values:
250
+ max_length = max(len(v) for v in most_frequent_values.values()) if most_frequent_values else 0
251
+
252
+ for column_name in most_frequent_values:
253
+ current_len = len(most_frequent_values[column_name])
254
+ if current_len < max_length:
255
+ most_frequent_values[column_name].extend([None] * (max_length - current_len))
256
+
257
+ return most_frequent_values
258
+
259
+
260
+ @register_plugin("mariadb")
261
+ class MariaDBPlugin(DbPlugin):
262
+ """MariaDB database plugin with full functionality."""
263
+
264
+ plugin_name = "MariaDB Plugin"
265
+ plugin_version = "1.0.0"
266
+ supported_db_types = ["mariadb"]
267
+ required_dependencies = ["mariadb", "SQLAlchemy"]
268
+
269
+ _instances = {}
270
+ _lock = Lock()
271
+
272
+ def __init__(self, db_root_path: str, db_mode: str = "dev", **kwargs):
273
+ super().__init__(db_root_path, db_mode, **kwargs)
274
+ self.db_id = None
275
+ self.db_directory_path = None
276
+ self.host = None
277
+ self.port = None
278
+ self.dbname = None
279
+ self.user = None
280
+ self.password = None
281
+
282
+ # LSH manager integration (for backward compatibility)
283
+ self._lsh_manager = None
284
+
285
+ @classmethod
286
+ def get_instance(cls, host: str, port: int, dbname: str, user: str, password: str,
287
+ db_root_path: str, db_mode: str = "dev", **kwargs):
288
+ """Get or create a singleton instance based on connection parameters."""
289
+ required_params = ['host', 'port', 'dbname', 'user', 'password', 'db_root_path']
290
+
291
+ all_params = {
292
+ 'host': host,
293
+ 'port': port,
294
+ 'dbname': dbname,
295
+ 'user': user,
296
+ 'password': password,
297
+ 'db_root_path': db_root_path,
298
+ 'db_mode': db_mode,
299
+ **kwargs
300
+ }
301
+
302
+ missing_params = [param for param in required_params if all_params.get(param) is None]
303
+ if missing_params:
304
+ raise ValueError(f"Missing required parameter{'s' if len(missing_params) > 1 else ''}: {', '.join(missing_params)}")
305
+
306
+ with cls._lock:
307
+ instance_key = (host, port, dbname, user, password, db_root_path, db_mode)
308
+
309
+ if instance_key not in cls._instances:
310
+ instance = cls(db_root_path=db_root_path, db_mode=db_mode, **all_params)
311
+ instance.initialize(**all_params)
312
+ cls._instances[instance_key] = instance
313
+
314
+ return cls._instances[instance_key]
315
+
316
+ def create_adapter(self, **kwargs) -> DbAdapter:
317
+ """Create and return a MariaDB adapter instance."""
318
+ return MariaDBAdapter(kwargs)
319
+
320
+ def validate_connection_params(self, **kwargs) -> bool:
321
+ """Validate connection parameters for MariaDB."""
322
+ required = ['host', 'port', 'user', 'password']
323
+ database = kwargs.get('database') or kwargs.get('dbname')
324
+
325
+ if not database:
326
+ logger.error("Either 'database' or 'dbname' is required for MariaDB")
327
+ return False
328
+
329
+ for param in required:
330
+ if param not in kwargs:
331
+ logger.error(f"Missing required parameter: {param}")
332
+ return False
333
+
334
+ port = kwargs.get('port')
335
+ if not isinstance(port, int) or not (1 <= port <= 65535):
336
+ logger.error("port must be an integer between 1 and 65535")
337
+ return False
338
+
339
+ return True
340
+
341
+ def initialize(self, **kwargs) -> None:
342
+ """Initialize the MariaDB plugin."""
343
+ # Validate and extract parameters
344
+ self.host = kwargs.get('host')
345
+ self.port = kwargs.get('port', 3306)
346
+ self.dbname = kwargs.get('database') or kwargs.get('dbname')
347
+ self.user = kwargs.get('user') or kwargs.get('username')
348
+ self.password = kwargs.get('password')
349
+
350
+ # Set additional attributes
351
+ for key, value in kwargs.items():
352
+ if key not in ['host', 'port', 'database', 'dbname', 'user', 'username', 'password']:
353
+ setattr(self, key, value)
354
+
355
+ # Initialize with updated kwargs
356
+ super().initialize(**kwargs)
357
+
358
+ # Set up database directory path and ID
359
+ self.db_id = self.dbname
360
+ self._setup_directory_path(self.db_id)
361
+
362
+ logger.info(f"MariaDB plugin initialized for database: {self.db_id} at {self.host}:{self.port}")
363
+
364
+ def _setup_directory_path(self, db_id: str) -> None:
365
+ """Set up the database directory path."""
366
+ if isinstance(self.db_root_path, str):
367
+ self.db_root_path = Path(self.db_root_path)
368
+
369
+ self.db_directory_path = Path(self.db_root_path) / f"{self.db_mode}_databases" / db_id
370
+ self.db_id = db_id
371
+
372
+ # Reset LSH manager when directory path changes
373
+ self._lsh_manager = None
374
+
375
+ @property
376
+ def lsh_manager(self):
377
+ """Lazy load LSH manager for backward compatibility."""
378
+ if self._lsh_manager is None and self.db_directory_path:
379
+ from ..lsh.manager import LshManager
380
+ self._lsh_manager = LshManager(self.db_directory_path)
381
+ return self._lsh_manager
382
+
383
+ # LSH integration methods for backward compatibility
384
+ def set_lsh(self) -> str:
385
+ """Set LSH for backward compatibility."""
386
+ try:
387
+ if self.lsh_manager and self.lsh_manager.load_lsh():
388
+ return "success"
389
+ else:
390
+ return "error"
391
+ except Exception as e:
392
+ logger.error(f"Error loading LSH: {e}")
393
+ return "error"
394
+
395
+ def query_lsh(self, keyword: str, signature_size: int = 30, n_gram: int = 3, top_n: int = 10) -> Dict[str, Dict[str, List[str]]]:
396
+ """Query LSH for backward compatibility."""
397
+ if self.lsh_manager:
398
+ try:
399
+ return self.lsh_manager.query(
400
+ keyword=keyword,
401
+ signature_size=signature_size,
402
+ n_gram=n_gram,
403
+ top_n=top_n
404
+ )
405
+ except Exception as e:
406
+ logger.error(f"LSH query failed: {e}")
407
+ raise Exception(f"Error querying LSH for {self.db_id}: {e}")
408
+ else:
409
+ raise Exception(f"LSH not available for {self.db_id}")
410
+
411
+ def get_connection_info(self) -> Dict[str, Any]:
412
+ """Get connection information."""
413
+ base_info = super().get_plugin_info()
414
+
415
+ if self.adapter:
416
+ adapter_info = self.adapter.get_connection_info()
417
+ base_info.update(adapter_info)
418
+
419
+ base_info.update({
420
+ "db_id": self.db_id,
421
+ "host": self.host,
422
+ "port": self.port,
423
+ "database": self.dbname,
424
+ "user": self.user,
425
+ "db_directory_path": str(self.db_directory_path) if self.db_directory_path else None,
426
+ "lsh_available": self.lsh_manager is not None
427
+ })
428
+
429
+ return base_info
430
+
431
+ def get_example_data(self, table_name: str, number_of_rows: int = 30) -> Dict[str, List[Any]]:
432
+ """Get example data through adapter."""
433
+ if self.adapter:
434
+ return self.adapter.get_example_data(table_name, number_of_rows)
435
+ else:
436
+ raise RuntimeError("Plugin not initialized")