thoth-dbmanager 0.5.2__py3-none-any.whl → 0.5.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- thoth_dbmanager/__init__.py +1 -1
- thoth_dbmanager/adapters/__init__.py +30 -6
- thoth_dbmanager/adapters/mariadb.py +352 -129
- thoth_dbmanager/adapters/sqlserver.py +17 -6
- thoth_dbmanager/core/factory.py +3 -0
- thoth_dbmanager/core/interfaces.py +1 -82
- thoth_dbmanager/plugins/__init__.py +33 -8
- thoth_dbmanager/plugins/mariadb.py +31 -251
- {thoth_dbmanager-0.5.2.dist-info → thoth_dbmanager-0.5.8.dist-info}/METADATA +2 -7
- {thoth_dbmanager-0.5.2.dist-info → thoth_dbmanager-0.5.8.dist-info}/RECORD +14 -13
- thoth_dbmanager-0.5.8.dist-info/licenses/LICENSE.md +21 -0
- {thoth_dbmanager-0.5.2.dist-info → thoth_dbmanager-0.5.8.dist-info}/WHEEL +0 -0
- {thoth_dbmanager-0.5.2.dist-info → thoth_dbmanager-0.5.8.dist-info}/licenses/LICENSE +0 -0
- {thoth_dbmanager-0.5.2.dist-info → thoth_dbmanager-0.5.8.dist-info}/top_level.txt +0 -0
thoth_dbmanager/__init__.py
CHANGED
@@ -2,14 +2,38 @@
|
|
2
2
|
Database adapters for Thoth SQL Database Manager.
|
3
3
|
"""
|
4
4
|
|
5
|
-
|
5
|
+
import logging
|
6
|
+
|
7
|
+
logger = logging.getLogger(__name__)
|
8
|
+
|
9
|
+
# Always available adapter (SQLite is built into Python)
|
6
10
|
from .sqlite import SQLiteAdapter
|
7
|
-
from .mariadb import MariaDBAdapter
|
8
|
-
from .sqlserver import SQLServerAdapter
|
9
11
|
|
10
12
|
__all__ = [
|
11
|
-
"PostgreSQLAdapter",
|
12
13
|
"SQLiteAdapter",
|
13
|
-
"MariaDBAdapter",
|
14
|
-
"SQLServerAdapter",
|
15
14
|
]
|
15
|
+
|
16
|
+
# Optional adapters - only import if dependencies are available
|
17
|
+
try:
|
18
|
+
import psycopg2
|
19
|
+
from .postgresql import PostgreSQLAdapter
|
20
|
+
__all__.append("PostgreSQLAdapter")
|
21
|
+
except ImportError:
|
22
|
+
logger.debug("psycopg2 not installed, PostgreSQLAdapter not available")
|
23
|
+
PostgreSQLAdapter = None
|
24
|
+
|
25
|
+
try:
|
26
|
+
import mariadb
|
27
|
+
from .mariadb import MariaDBAdapter
|
28
|
+
__all__.append("MariaDBAdapter")
|
29
|
+
except ImportError:
|
30
|
+
logger.debug("MariaDB connector not installed, MariaDBAdapter not available")
|
31
|
+
MariaDBAdapter = None
|
32
|
+
|
33
|
+
try:
|
34
|
+
import pyodbc
|
35
|
+
from .sqlserver import SQLServerAdapter
|
36
|
+
__all__.append("SQLServerAdapter")
|
37
|
+
except ImportError:
|
38
|
+
logger.debug("pyodbc not installed, SQLServerAdapter not available")
|
39
|
+
SQLServerAdapter = None
|
@@ -1,165 +1,388 @@
|
|
1
|
+
# Copyright (c) 2025 Marco Pancotti
|
2
|
+
# This file is part of Thoth and is released under the MIT License.
|
3
|
+
# See the LICENSE.md file in the project root for full license information.
|
4
|
+
|
1
5
|
"""
|
2
|
-
MariaDB adapter
|
6
|
+
MariaDB adapter implementation.
|
3
7
|
"""
|
4
|
-
|
5
|
-
from typing import Any, Dict, List, Optional
|
6
|
-
|
7
|
-
from sqlalchemy
|
8
|
+
import logging
|
9
|
+
from typing import Any, Dict, List, Optional, Union
|
10
|
+
import mariadb
|
11
|
+
from sqlalchemy import create_engine, text, inspect
|
8
12
|
from sqlalchemy.exc import SQLAlchemyError
|
9
13
|
|
10
14
|
from ..core.interfaces import DbAdapter
|
15
|
+
from ..documents import (
|
16
|
+
TableDocument,
|
17
|
+
ColumnDocument,
|
18
|
+
SchemaDocument,
|
19
|
+
ForeignKeyDocument,
|
20
|
+
IndexDocument
|
21
|
+
)
|
22
|
+
|
23
|
+
logger = logging.getLogger(__name__)
|
11
24
|
|
12
25
|
|
13
26
|
class MariaDBAdapter(DbAdapter):
|
14
|
-
"""
|
27
|
+
"""
|
28
|
+
MariaDB database adapter implementation.
|
29
|
+
"""
|
15
30
|
|
16
|
-
def __init__(self,
|
17
|
-
|
18
|
-
Initialize MariaDB adapter.
|
19
|
-
|
20
|
-
Args:
|
21
|
-
connection_string: MariaDB connection string
|
22
|
-
**kwargs: Additional connection parameters
|
23
|
-
"""
|
24
|
-
self.connection_string = connection_string
|
31
|
+
def __init__(self, connection_params: Dict[str, Any]):
|
32
|
+
super().__init__(connection_params)
|
25
33
|
self.engine = None
|
26
|
-
self.
|
27
|
-
|
34
|
+
self.raw_connection = None
|
35
|
+
self.host = connection_params.get('host', 'localhost')
|
36
|
+
self.port = connection_params.get('port', 3307)
|
37
|
+
self.database = connection_params.get('database')
|
38
|
+
self.user = connection_params.get('user')
|
39
|
+
self.password = connection_params.get('password')
|
40
|
+
|
28
41
|
def connect(self) -> None:
|
29
|
-
"""Establish
|
42
|
+
"""Establish MariaDB connection"""
|
30
43
|
try:
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
44
|
+
# Create SQLAlchemy engine
|
45
|
+
connection_string = self._build_connection_string()
|
46
|
+
self.engine = create_engine(connection_string, echo=False)
|
47
|
+
|
48
|
+
# Test connection
|
49
|
+
with self.engine.connect() as conn:
|
50
|
+
conn.execute(text("SELECT 1"))
|
51
|
+
|
52
|
+
# Also create raw mariadb connection for specific operations
|
53
|
+
self.raw_connection = mariadb.connect(
|
54
|
+
host=self.host,
|
55
|
+
port=self.port,
|
56
|
+
database=self.database,
|
57
|
+
user=self.user,
|
58
|
+
password=self.password
|
35
59
|
)
|
60
|
+
|
61
|
+
self._initialized = True
|
62
|
+
logger.info("MariaDB connection established successfully")
|
63
|
+
|
36
64
|
except Exception as e:
|
37
|
-
|
65
|
+
logger.error(f"Failed to connect to MariaDB: {e}")
|
66
|
+
raise
|
38
67
|
|
39
68
|
def disconnect(self) -> None:
|
40
|
-
"""Close
|
41
|
-
|
42
|
-
self.engine
|
43
|
-
|
69
|
+
"""Close MariaDB connection"""
|
70
|
+
try:
|
71
|
+
if self.engine:
|
72
|
+
self.engine.dispose()
|
73
|
+
self.engine = None
|
74
|
+
|
75
|
+
if self.raw_connection:
|
76
|
+
self.raw_connection.close()
|
77
|
+
self.raw_connection = None
|
78
|
+
|
79
|
+
self._initialized = False
|
80
|
+
logger.info("MariaDB connection closed")
|
81
|
+
|
82
|
+
except Exception as e:
|
83
|
+
logger.error(f"Error closing MariaDB connection: {e}")
|
84
|
+
|
85
|
+
def _build_connection_string(self) -> str:
|
86
|
+
"""Build SQLAlchemy connection string for MariaDB"""
|
87
|
+
if not all([self.database, self.user, self.password]):
|
88
|
+
raise ValueError("Missing required connection parameters: database, user, password")
|
89
|
+
|
90
|
+
# MariaDB uses mysql+pymysql or mariadb+mariadbconnector dialect
|
91
|
+
return f"mariadb+mariadbconnector://{self.user}:{self.password}@{self.host}:{self.port}/{self.database}"
|
44
92
|
|
45
|
-
def execute_query(self, query: str, params: Optional[Dict[str,
|
46
|
-
"""Execute
|
93
|
+
def execute_query(self, query: str, params: Optional[Dict] = None, fetch: Union[str, int] = "all", timeout: int = 60) -> Any:
|
94
|
+
"""Execute SQL query"""
|
47
95
|
if not self.engine:
|
48
|
-
|
49
|
-
|
96
|
+
raise RuntimeError("Not connected to database")
|
97
|
+
|
50
98
|
try:
|
51
99
|
with self.engine.connect() as conn:
|
52
|
-
|
53
|
-
|
100
|
+
# MariaDB doesn't have direct query timeout in the same way
|
101
|
+
# but we can set connection timeout
|
102
|
+
conn.execute(text(f"SET SESSION max_statement_time = {timeout}"))
|
103
|
+
|
104
|
+
# Execute query
|
105
|
+
if params:
|
106
|
+
result = conn.execute(text(query), params)
|
107
|
+
else:
|
108
|
+
result = conn.execute(text(query))
|
109
|
+
|
110
|
+
# Handle different fetch modes
|
111
|
+
if query.strip().upper().startswith(('SELECT', 'WITH', 'SHOW', 'DESCRIBE')):
|
112
|
+
if fetch == "all":
|
113
|
+
return [dict(row._mapping) for row in result]
|
114
|
+
elif fetch == "one":
|
115
|
+
row = result.first()
|
116
|
+
return dict(row._mapping) if row else None
|
117
|
+
elif isinstance(fetch, int):
|
118
|
+
rows = result.fetchmany(fetch)
|
119
|
+
return [dict(row._mapping) for row in rows]
|
120
|
+
else:
|
121
|
+
# For INSERT, UPDATE, DELETE
|
122
|
+
conn.commit()
|
123
|
+
return result.rowcount
|
124
|
+
|
54
125
|
except SQLAlchemyError as e:
|
55
|
-
|
126
|
+
logger.error(f"MariaDB query execution failed: {e}")
|
127
|
+
raise
|
56
128
|
|
57
|
-
def
|
58
|
-
"""
|
129
|
+
def get_tables_as_documents(self) -> List[TableDocument]:
|
130
|
+
"""Return tables as document objects"""
|
59
131
|
if not self.engine:
|
60
|
-
|
61
|
-
|
132
|
+
raise RuntimeError("Not connected to database")
|
133
|
+
|
62
134
|
try:
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
135
|
+
inspector = inspect(self.engine)
|
136
|
+
tables = []
|
137
|
+
|
138
|
+
for table_name in inspector.get_table_names():
|
139
|
+
# Get row count
|
140
|
+
count_result = self.execute_query(f"SELECT COUNT(*) as count FROM {table_name}", fetch="one")
|
141
|
+
row_count = count_result.get('count', 0) if count_result else 0
|
142
|
+
|
143
|
+
# Get column count
|
144
|
+
columns = inspector.get_columns(table_name)
|
145
|
+
|
146
|
+
# Get table comment (if available)
|
147
|
+
table_comment = ""
|
148
|
+
try:
|
149
|
+
comment_result = self.execute_query(
|
150
|
+
f"SELECT table_comment FROM information_schema.tables WHERE table_name = '{table_name}'",
|
151
|
+
fetch="one"
|
152
|
+
)
|
153
|
+
table_comment = comment_result.get('table_comment', '') if comment_result else ''
|
154
|
+
except:
|
155
|
+
pass
|
156
|
+
|
157
|
+
tables.append(TableDocument(
|
158
|
+
table_name=table_name,
|
159
|
+
table_type="TABLE",
|
160
|
+
row_count=row_count,
|
161
|
+
column_count=len(columns),
|
162
|
+
description=table_comment
|
163
|
+
))
|
164
|
+
|
165
|
+
return tables
|
166
|
+
|
167
|
+
except Exception as e:
|
168
|
+
logger.error(f"Error getting tables as documents: {e}")
|
169
|
+
raise
|
75
170
|
|
76
|
-
def
|
77
|
-
"""
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
schema = {
|
82
|
-
'table_name': table_name,
|
83
|
-
'columns': []
|
84
|
-
}
|
171
|
+
def get_columns_as_documents(self, table_name: str) -> List[ColumnDocument]:
|
172
|
+
"""Return columns as document objects"""
|
173
|
+
if not self.engine:
|
174
|
+
raise RuntimeError("Not connected to database")
|
85
175
|
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
176
|
+
try:
|
177
|
+
inspector = inspect(self.engine)
|
178
|
+
columns = []
|
179
|
+
|
180
|
+
for col in inspector.get_columns(table_name):
|
181
|
+
columns.append(ColumnDocument(
|
182
|
+
table_name=table_name,
|
183
|
+
column_name=col['name'],
|
184
|
+
data_type=str(col['type']),
|
185
|
+
is_nullable=col.get('nullable', True),
|
186
|
+
column_default=col.get('default'),
|
187
|
+
is_pk=col.get('primary_key', False),
|
188
|
+
column_comment=col.get('comment', '')
|
189
|
+
))
|
190
|
+
|
191
|
+
# Mark primary keys
|
192
|
+
pk_constraint = inspector.get_pk_constraint(table_name)
|
193
|
+
if pk_constraint and pk_constraint.get('constrained_columns'):
|
194
|
+
pk_columns = pk_constraint['constrained_columns']
|
195
|
+
for col in columns:
|
196
|
+
if col.column_name in pk_columns:
|
197
|
+
col.is_pk = True
|
198
|
+
|
199
|
+
return columns
|
200
|
+
|
201
|
+
except Exception as e:
|
202
|
+
logger.error(f"Error getting columns as documents: {e}")
|
203
|
+
raise
|
204
|
+
|
205
|
+
def get_foreign_keys_as_documents(self) -> List[ForeignKeyDocument]:
|
206
|
+
"""Return foreign keys as document objects"""
|
207
|
+
if not self.engine:
|
208
|
+
raise RuntimeError("Not connected to database")
|
94
209
|
|
95
|
-
|
210
|
+
try:
|
211
|
+
inspector = inspect(self.engine)
|
212
|
+
foreign_keys = []
|
213
|
+
|
214
|
+
for table_name in inspector.get_table_names():
|
215
|
+
for fk in inspector.get_foreign_keys(table_name):
|
216
|
+
# Each foreign key can have multiple column pairs
|
217
|
+
for i, const_col in enumerate(fk['constrained_columns']):
|
218
|
+
foreign_keys.append(ForeignKeyDocument(
|
219
|
+
constraint_name=fk['name'],
|
220
|
+
table_name=table_name,
|
221
|
+
column_name=const_col,
|
222
|
+
foreign_table_name=fk['referred_table'],
|
223
|
+
foreign_column_name=fk['referred_columns'][i] if i < len(fk['referred_columns']) else None
|
224
|
+
))
|
225
|
+
|
226
|
+
return foreign_keys
|
227
|
+
|
228
|
+
except Exception as e:
|
229
|
+
logger.error(f"Error getting foreign keys as documents: {e}")
|
230
|
+
raise
|
96
231
|
|
97
|
-
def
|
98
|
-
"""
|
99
|
-
|
100
|
-
|
232
|
+
def get_schemas_as_documents(self) -> List[SchemaDocument]:
|
233
|
+
"""Return schemas as document objects"""
|
234
|
+
# MariaDB uses database as schema concept
|
235
|
+
if not self.engine:
|
236
|
+
raise RuntimeError("Not connected to database")
|
101
237
|
|
102
|
-
|
103
|
-
|
104
|
-
result.
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
238
|
+
try:
|
239
|
+
# Get current database as schema
|
240
|
+
result = self.execute_query("SELECT DATABASE() as db_name", fetch="one")
|
241
|
+
current_db = result.get('db_name') if result else self.database
|
242
|
+
|
243
|
+
# Get table count for current database
|
244
|
+
tables = self.get_tables_as_documents()
|
245
|
+
|
246
|
+
return [SchemaDocument(
|
247
|
+
catalog_name=current_db,
|
248
|
+
schema_name=current_db,
|
249
|
+
schema_owner=self.user,
|
250
|
+
table_count=len(tables)
|
251
|
+
)]
|
252
|
+
|
253
|
+
except Exception as e:
|
254
|
+
logger.error(f"Error getting schemas as documents: {e}")
|
255
|
+
raise
|
256
|
+
|
257
|
+
def get_indexes_as_documents(self, table_name: Optional[str] = None) -> List[IndexDocument]:
|
258
|
+
"""Return indexes as document objects"""
|
259
|
+
if not self.engine:
|
260
|
+
raise RuntimeError("Not connected to database")
|
110
261
|
|
111
|
-
|
262
|
+
try:
|
263
|
+
inspector = inspect(self.engine)
|
264
|
+
indexes = []
|
265
|
+
|
266
|
+
# Get tables to process
|
267
|
+
tables = [table_name] if table_name else inspector.get_table_names()
|
268
|
+
|
269
|
+
for tbl in tables:
|
270
|
+
for idx in inspector.get_indexes(tbl):
|
271
|
+
indexes.append(IndexDocument(
|
272
|
+
table_name=tbl,
|
273
|
+
index_name=idx['name'],
|
274
|
+
column_names=idx['column_names'],
|
275
|
+
is_unique=idx.get('unique', False),
|
276
|
+
index_type='BTREE' # MariaDB default
|
277
|
+
))
|
278
|
+
|
279
|
+
return indexes
|
280
|
+
|
281
|
+
except Exception as e:
|
282
|
+
logger.error(f"Error getting indexes as documents: {e}")
|
283
|
+
raise
|
112
284
|
|
113
|
-
def
|
114
|
-
"""Get foreign key information for a table."""
|
115
|
-
query = f"""
|
116
|
-
SELECT
|
117
|
-
CONSTRAINT_NAME as name,
|
118
|
-
COLUMN_NAME as column_name,
|
119
|
-
REFERENCED_TABLE_NAME as referenced_table,
|
120
|
-
REFERENCED_COLUMN_NAME as referenced_column
|
121
|
-
FROM INFORMATION_SCHEMA.KEY_COLUMN_USAGE
|
122
|
-
WHERE TABLE_NAME = '{table_name}'
|
123
|
-
AND REFERENCED_TABLE_NAME IS NOT NULL
|
285
|
+
def get_unique_values(self) -> Dict[str, Dict[str, List[str]]]:
|
124
286
|
"""
|
287
|
+
Get unique values from the database.
|
125
288
|
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
col_def += " NOT NULL"
|
135
|
-
if col.get('default') is not None:
|
136
|
-
col_def += f" DEFAULT {col['default']}"
|
137
|
-
if col.get('primary_key'):
|
138
|
-
col_def += " PRIMARY KEY"
|
139
|
-
columns.append(col_def)
|
289
|
+
Returns:
|
290
|
+
Dict[str, Dict[str, List[str]]]: Dictionary where:
|
291
|
+
- outer key is table name
|
292
|
+
- inner key is column name
|
293
|
+
- value is list of unique values
|
294
|
+
"""
|
295
|
+
if not self.engine:
|
296
|
+
raise RuntimeError("Not connected to database")
|
140
297
|
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
298
|
+
try:
|
299
|
+
inspector = inspect(self.engine)
|
300
|
+
unique_values = {}
|
301
|
+
|
302
|
+
for table_name in inspector.get_table_names():
|
303
|
+
unique_values[table_name] = {}
|
304
|
+
|
305
|
+
for col in inspector.get_columns(table_name):
|
306
|
+
col_name = col['name']
|
307
|
+
# Only get unique values for reasonable data types
|
308
|
+
col_type = str(col['type']).upper()
|
309
|
+
|
310
|
+
if any(t in col_type for t in ['VARCHAR', 'CHAR', 'TEXT', 'INT', 'ENUM']):
|
311
|
+
try:
|
312
|
+
# Limit to first 100 unique values
|
313
|
+
query = f"SELECT DISTINCT `{col_name}` FROM `{table_name}` LIMIT 100"
|
314
|
+
result = self.execute_query(query)
|
315
|
+
|
316
|
+
values = []
|
317
|
+
for row in result:
|
318
|
+
val = row.get(col_name)
|
319
|
+
if val is not None:
|
320
|
+
values.append(str(val))
|
321
|
+
|
322
|
+
if values:
|
323
|
+
unique_values[table_name][col_name] = values
|
324
|
+
|
325
|
+
except Exception as e:
|
326
|
+
logger.debug(f"Could not get unique values for {table_name}.{col_name}: {e}")
|
327
|
+
continue
|
328
|
+
|
329
|
+
return unique_values
|
330
|
+
|
331
|
+
except Exception as e:
|
332
|
+
logger.error(f"Error getting unique values: {e}")
|
333
|
+
raise
|
148
334
|
|
149
|
-
def
|
150
|
-
"""Check if a table exists."""
|
151
|
-
query = f"""
|
152
|
-
SELECT COUNT(*) as count
|
153
|
-
FROM INFORMATION_SCHEMA.TABLES
|
154
|
-
WHERE TABLE_NAME = '{table_name}'
|
335
|
+
def get_example_data(self, table_name: str, number_of_rows: int = 30) -> Dict[str, List[Any]]:
|
155
336
|
"""
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
337
|
+
Get example data (most frequent values) for each column in a table.
|
338
|
+
|
339
|
+
Args:
|
340
|
+
table_name (str): The name of the table.
|
341
|
+
number_of_rows (int, optional): Maximum number of example values to return per column. Defaults to 30.
|
342
|
+
|
343
|
+
Returns:
|
344
|
+
Dict[str, List[Any]]: A dictionary mapping column names to lists of example values.
|
345
|
+
"""
|
346
|
+
if not self.engine:
|
347
|
+
raise RuntimeError("Not connected to database")
|
348
|
+
|
349
|
+
try:
|
350
|
+
inspector = inspect(self.engine)
|
351
|
+
columns = inspector.get_columns(table_name)
|
352
|
+
|
353
|
+
example_data = {}
|
354
|
+
|
355
|
+
for col in columns:
|
356
|
+
col_name = col['name']
|
357
|
+
col_type = str(col['type']).upper()
|
358
|
+
|
359
|
+
# Skip blob/binary columns
|
360
|
+
if any(t in col_type for t in ['BLOB', 'BINARY', 'IMAGE']):
|
361
|
+
example_data[col_name] = []
|
362
|
+
continue
|
363
|
+
|
364
|
+
try:
|
365
|
+
# Get most frequent values
|
366
|
+
query = f"""
|
367
|
+
SELECT `{col_name}`, COUNT(*) as freq
|
368
|
+
FROM `{table_name}`
|
369
|
+
WHERE `{col_name}` IS NOT NULL
|
370
|
+
GROUP BY `{col_name}`
|
371
|
+
ORDER BY freq DESC
|
372
|
+
LIMIT {number_of_rows}
|
373
|
+
"""
|
374
|
+
|
375
|
+
result = self.execute_query(query)
|
376
|
+
values = [row[col_name] for row in result]
|
377
|
+
|
378
|
+
example_data[col_name] = values
|
379
|
+
|
380
|
+
except Exception as e:
|
381
|
+
logger.debug(f"Could not get example data for {table_name}.{col_name}: {e}")
|
382
|
+
example_data[col_name] = []
|
383
|
+
|
384
|
+
return example_data
|
385
|
+
|
386
|
+
except Exception as e:
|
387
|
+
logger.error(f"Error getting example data: {e}")
|
388
|
+
raise
|
@@ -23,6 +23,7 @@ class SQLServerAdapter(DbAdapter):
|
|
23
23
|
self.database = connection_params.get('database')
|
24
24
|
self.user = connection_params.get('user')
|
25
25
|
self.password = connection_params.get('password')
|
26
|
+
self.schema = connection_params.get('schema', 'dbo') # Default to 'dbo' for SQL Server
|
26
27
|
self.driver = connection_params.get('driver', 'ODBC Driver 17 for SQL Server')
|
27
28
|
|
28
29
|
def connect(self) -> None:
|
@@ -142,10 +143,11 @@ class SQLServerAdapter(DbAdapter):
|
|
142
143
|
|
143
144
|
def get_tables(self) -> List[str]:
|
144
145
|
"""Get list of tables in the database."""
|
145
|
-
query = """
|
146
|
+
query = f"""
|
146
147
|
SELECT TABLE_NAME as name
|
147
148
|
FROM INFORMATION_SCHEMA.TABLES
|
148
149
|
WHERE TABLE_TYPE = 'BASE TABLE'
|
150
|
+
AND TABLE_SCHEMA = '{self.schema}'
|
149
151
|
ORDER BY TABLE_NAME
|
150
152
|
"""
|
151
153
|
result = self.execute_query(query)
|
@@ -159,15 +161,17 @@ class SQLServerAdapter(DbAdapter):
|
|
159
161
|
DATA_TYPE as type,
|
160
162
|
IS_NULLABLE as nullable,
|
161
163
|
COLUMN_DEFAULT as default_value,
|
162
|
-
CASE WHEN COLUMNPROPERTY(OBJECT_ID(TABLE_NAME), COLUMN_NAME, 'IsIdentity') = 1 THEN 1 ELSE 0 END as is_identity,
|
164
|
+
CASE WHEN COLUMNPROPERTY(OBJECT_ID('{self.schema}.' + TABLE_NAME), COLUMN_NAME, 'IsIdentity') = 1 THEN 1 ELSE 0 END as is_identity,
|
163
165
|
CASE WHEN EXISTS (
|
164
166
|
SELECT 1 FROM INFORMATION_SCHEMA.KEY_COLUMN_USAGE
|
165
167
|
WHERE TABLE_NAME = '{table_name}'
|
168
|
+
AND TABLE_SCHEMA = '{self.schema}'
|
166
169
|
AND COLUMN_NAME = c.COLUMN_NAME
|
167
170
|
AND CONSTRAINT_NAME LIKE 'PK_%'
|
168
171
|
) THEN 1 ELSE 0 END as is_primary_key
|
169
172
|
FROM INFORMATION_SCHEMA.COLUMNS c
|
170
173
|
WHERE TABLE_NAME = '{table_name}'
|
174
|
+
AND TABLE_SCHEMA = '{self.schema}'
|
171
175
|
ORDER BY ORDINAL_POSITION
|
172
176
|
"""
|
173
177
|
|
@@ -255,6 +259,7 @@ class SQLServerAdapter(DbAdapter):
|
|
255
259
|
SELECT COUNT(*) as count
|
256
260
|
FROM INFORMATION_SCHEMA.TABLES
|
257
261
|
WHERE TABLE_NAME = '{table_name}'
|
262
|
+
AND TABLE_SCHEMA = '{self.schema}'
|
258
263
|
AND TABLE_TYPE = 'BASE TABLE'
|
259
264
|
"""
|
260
265
|
result = self.execute_query(query)
|
@@ -276,13 +281,14 @@ class SQLServerAdapter(DbAdapter):
|
|
276
281
|
if not self.engine:
|
277
282
|
raise RuntimeError("Not connected to database")
|
278
283
|
|
279
|
-
query = """
|
284
|
+
query = f"""
|
280
285
|
SELECT
|
281
286
|
TABLE_NAME as name,
|
282
287
|
TABLE_SCHEMA as schema_name,
|
283
288
|
'' as comment
|
284
289
|
FROM INFORMATION_SCHEMA.TABLES
|
285
290
|
WHERE TABLE_TYPE = 'BASE TABLE'
|
291
|
+
AND TABLE_SCHEMA = '{self.schema}'
|
286
292
|
ORDER BY TABLE_NAME
|
287
293
|
"""
|
288
294
|
|
@@ -323,9 +329,10 @@ class SQLServerAdapter(DbAdapter):
|
|
323
329
|
"""Get example data (most frequent values) for each column in a table."""
|
324
330
|
inspector = inspect(self.engine)
|
325
331
|
try:
|
326
|
-
|
332
|
+
# For SQL Server, we need to specify the schema when inspecting columns
|
333
|
+
columns = inspector.get_columns(table_name, schema=self.schema)
|
327
334
|
except SQLAlchemyError as e:
|
328
|
-
logger.error(f"Error inspecting columns for table {table_name}: {e}")
|
335
|
+
logger.error(f"Error inspecting columns for table {table_name} in schema {self.schema}: {e}")
|
329
336
|
raise e
|
330
337
|
|
331
338
|
if not columns:
|
@@ -374,16 +381,18 @@ class SQLServerAdapter(DbAdapter):
|
|
374
381
|
CASE WHEN EXISTS (
|
375
382
|
SELECT 1 FROM INFORMATION_SCHEMA.KEY_COLUMN_USAGE
|
376
383
|
WHERE TABLE_NAME = c.TABLE_NAME
|
384
|
+
AND TABLE_SCHEMA = c.TABLE_SCHEMA
|
377
385
|
AND COLUMN_NAME = c.COLUMN_NAME
|
378
386
|
AND CONSTRAINT_NAME LIKE 'PK_%'
|
379
387
|
) THEN 1 ELSE 0 END as is_primary_key
|
380
388
|
FROM INFORMATION_SCHEMA.COLUMNS c
|
381
389
|
WHERE c.TABLE_NAME = '{table_name}'
|
390
|
+
AND c.TABLE_SCHEMA = '{self.schema}'
|
382
391
|
ORDER BY c.ORDINAL_POSITION
|
383
392
|
"""
|
384
393
|
else:
|
385
394
|
# Get all columns
|
386
|
-
query = """
|
395
|
+
query = f"""
|
387
396
|
SELECT
|
388
397
|
c.TABLE_NAME as table_name,
|
389
398
|
c.COLUMN_NAME as column_name,
|
@@ -394,10 +403,12 @@ class SQLServerAdapter(DbAdapter):
|
|
394
403
|
CASE WHEN EXISTS (
|
395
404
|
SELECT 1 FROM INFORMATION_SCHEMA.KEY_COLUMN_USAGE
|
396
405
|
WHERE TABLE_NAME = c.TABLE_NAME
|
406
|
+
AND TABLE_SCHEMA = c.TABLE_SCHEMA
|
397
407
|
AND COLUMN_NAME = c.COLUMN_NAME
|
398
408
|
AND CONSTRAINT_NAME LIKE 'PK_%'
|
399
409
|
) THEN 1 ELSE 0 END as is_primary_key
|
400
410
|
FROM INFORMATION_SCHEMA.COLUMNS c
|
411
|
+
WHERE c.TABLE_SCHEMA = '{self.schema}'
|
401
412
|
ORDER BY c.TABLE_NAME, c.ORDINAL_POSITION
|
402
413
|
"""
|
403
414
|
|
thoth_dbmanager/core/factory.py
CHANGED
@@ -6,6 +6,9 @@ from typing import Any, Dict, List, Optional
|
|
6
6
|
from .registry import DbPluginRegistry
|
7
7
|
from .interfaces import DbPlugin
|
8
8
|
|
9
|
+
# Import plugins to ensure they are registered
|
10
|
+
from .. import plugins # This imports all plugins and registers them
|
11
|
+
|
9
12
|
logger = logging.getLogger(__name__)
|
10
13
|
|
11
14
|
|
@@ -278,85 +278,4 @@ class DbPlugin(ABC):
|
|
278
278
|
"""Get unique values (backward compatibility)"""
|
279
279
|
if not self.adapter:
|
280
280
|
raise RuntimeError("Plugin not initialized")
|
281
|
-
return self.adapter.get_unique_values()
|
282
|
-
|
283
|
-
def get_embedding_function(self):
|
284
|
-
"""
|
285
|
-
Get the embedding function for similarity computations.
|
286
|
-
|
287
|
-
Returns:
|
288
|
-
SafeSentenceTransformer: An embedding function with encode method
|
289
|
-
"""
|
290
|
-
try:
|
291
|
-
# Import SafeSentenceTransformer
|
292
|
-
try:
|
293
|
-
from sentence_transformers import SentenceTransformer
|
294
|
-
import logging
|
295
|
-
|
296
|
-
logger = logging.getLogger(__name__)
|
297
|
-
|
298
|
-
class SafeSentenceTransformer:
|
299
|
-
"""
|
300
|
-
Wrapper for SentenceTransformer that handles PyTorch meta tensor issues.
|
301
|
-
"""
|
302
|
-
def __init__(self, model_name_or_path: str):
|
303
|
-
self.model_name_or_path = model_name_or_path
|
304
|
-
self._model = None
|
305
|
-
|
306
|
-
def _get_model(self):
|
307
|
-
"""Lazy initialization of the SentenceTransformer model."""
|
308
|
-
if self._model is None:
|
309
|
-
try:
|
310
|
-
logger.info(f"Initializing SentenceTransformer with model: {self.model_name_or_path}")
|
311
|
-
self._model = SentenceTransformer(
|
312
|
-
model_name_or_path=self.model_name_or_path,
|
313
|
-
device='cpu' # Explicitly set device to CPU to avoid meta tensor issues
|
314
|
-
)
|
315
|
-
logger.info("SentenceTransformer initialized successfully")
|
316
|
-
except Exception as e:
|
317
|
-
logger.error(f"Failed to initialize SentenceTransformer: {e}")
|
318
|
-
# Try alternative initialization approach
|
319
|
-
try:
|
320
|
-
logger.info("Trying alternative initialization approach...")
|
321
|
-
self._model = SentenceTransformer(self.model_name_or_path)
|
322
|
-
# Move to CPU explicitly after initialization
|
323
|
-
self._model = self._model.to('cpu')
|
324
|
-
logger.info("Alternative initialization successful")
|
325
|
-
except Exception as e2:
|
326
|
-
logger.error(f"Alternative initialization also failed: {e2}")
|
327
|
-
raise e2
|
328
|
-
return self._model
|
329
|
-
|
330
|
-
def encode(self, sentences, **kwargs):
|
331
|
-
"""Encode sentences using the underlying SentenceTransformer model."""
|
332
|
-
model = self._get_model()
|
333
|
-
return model.encode(sentences, **kwargs)
|
334
|
-
|
335
|
-
return SafeSentenceTransformer(
|
336
|
-
model_name_or_path="paraphrase-multilingual-MiniLM-L12-v2"
|
337
|
-
)
|
338
|
-
|
339
|
-
except ImportError:
|
340
|
-
import logging
|
341
|
-
logger = logging.getLogger(__name__)
|
342
|
-
logger.warning("sentence_transformers not available, creating dummy embedding function")
|
343
|
-
# Create a dummy embedding function for testing
|
344
|
-
class DummyEmbeddingFunction:
|
345
|
-
def encode(self, sentences, **kwargs):
|
346
|
-
import numpy as np
|
347
|
-
# Return dummy embeddings - same shape for all sentences
|
348
|
-
return np.random.rand(len(sentences), 384) # 384 is typical embedding size
|
349
|
-
|
350
|
-
return DummyEmbeddingFunction()
|
351
|
-
|
352
|
-
except Exception as e:
|
353
|
-
import logging
|
354
|
-
logger = logging.getLogger(__name__)
|
355
|
-
logger.error(f"Failed to create embedding function: {e}")
|
356
|
-
# Return a basic dummy function as fallback
|
357
|
-
class BasicDummyEmbeddingFunction:
|
358
|
-
def encode(self, sentences, **kwargs):
|
359
|
-
import numpy as np
|
360
|
-
return np.random.rand(len(sentences), 384)
|
361
|
-
|
362
|
-
return BasicDummyEmbeddingFunction()
|
281
|
+
return self.adapter.get_unique_values()
|
@@ -2,16 +2,41 @@
|
|
2
2
|
Database plugins for Thoth SQL Database Manager.
|
3
3
|
"""
|
4
4
|
|
5
|
-
|
6
|
-
|
5
|
+
import logging
|
6
|
+
|
7
|
+
logger = logging.getLogger(__name__)
|
8
|
+
|
9
|
+
# Always available plugin (SQLite is built into Python)
|
7
10
|
from .sqlite import SQLitePlugin
|
8
|
-
from .mariadb import MariaDBPlugin
|
9
|
-
from .sqlserver import SQLServerPlugin
|
10
11
|
|
11
|
-
# This ensures all plugins are registered when the module is imported
|
12
12
|
__all__ = [
|
13
|
-
"PostgreSQLPlugin",
|
14
13
|
"SQLitePlugin",
|
15
|
-
"MariaDBPlugin",
|
16
|
-
"SQLServerPlugin",
|
17
14
|
]
|
15
|
+
|
16
|
+
# Optional plugins - only import if dependencies are available
|
17
|
+
try:
|
18
|
+
import psycopg2
|
19
|
+
from .postgresql import PostgreSQLPlugin
|
20
|
+
__all__.append("PostgreSQLPlugin")
|
21
|
+
logger.debug("PostgreSQL plugin loaded successfully")
|
22
|
+
except ImportError:
|
23
|
+
logger.debug("psycopg2 not installed, PostgreSQL plugin not available")
|
24
|
+
PostgreSQLPlugin = None
|
25
|
+
|
26
|
+
try:
|
27
|
+
import mariadb
|
28
|
+
from .mariadb import MariaDBPlugin
|
29
|
+
__all__.append("MariaDBPlugin")
|
30
|
+
logger.debug("MariaDB plugin loaded successfully")
|
31
|
+
except ImportError:
|
32
|
+
logger.debug("MariaDB connector not installed, MariaDB plugin not available")
|
33
|
+
MariaDBPlugin = None
|
34
|
+
|
35
|
+
try:
|
36
|
+
import pyodbc
|
37
|
+
from .sqlserver import SQLServerPlugin
|
38
|
+
__all__.append("SQLServerPlugin")
|
39
|
+
logger.debug("SQL Server plugin loaded successfully")
|
40
|
+
except ImportError:
|
41
|
+
logger.debug("pyodbc not installed, SQL Server plugin not available")
|
42
|
+
SQLServerPlugin = None
|
@@ -1,265 +1,27 @@
|
|
1
|
+
# Copyright (c) 2025 Marco Pancotti
|
2
|
+
# This file is part of Thoth and is released under the MIT License.
|
3
|
+
# See the LICENSE.md file in the project root for full license information.
|
4
|
+
|
1
5
|
"""
|
2
6
|
MariaDB plugin for Thoth SQL Database Manager.
|
3
|
-
|
7
|
+
Uses the MariaDB adapter from adapters.mariadb module.
|
4
8
|
"""
|
5
9
|
|
6
10
|
import logging
|
7
|
-
import os
|
8
11
|
from pathlib import Path
|
9
12
|
from threading import Lock
|
10
|
-
from typing import Any, Dict, List, Optional
|
11
|
-
|
12
|
-
from sqlalchemy import create_engine, inspect, text
|
13
|
-
from sqlalchemy.exc import SQLAlchemyError
|
13
|
+
from typing import Any, Dict, List, Optional
|
14
14
|
|
15
15
|
from ..core.interfaces import DbPlugin, DbAdapter
|
16
16
|
from ..core.registry import register_plugin
|
17
|
-
from ..
|
17
|
+
from ..adapters.mariadb import MariaDBAdapter
|
18
18
|
|
19
19
|
logger = logging.getLogger(__name__)
|
20
20
|
|
21
21
|
|
22
|
-
class MariaDBAdapter(DbAdapter):
|
23
|
-
"""MariaDB database adapter with full functionality."""
|
24
|
-
|
25
|
-
def __init__(self, connection_params: Dict[str, Any]):
|
26
|
-
super().__init__(connection_params)
|
27
|
-
self.engine = None
|
28
|
-
self.host = connection_params.get('host')
|
29
|
-
self.port = connection_params.get('port', 3306)
|
30
|
-
self.dbname = connection_params.get('database') or connection_params.get('dbname')
|
31
|
-
self.user = connection_params.get('user') or connection_params.get('username')
|
32
|
-
self.password = connection_params.get('password')
|
33
|
-
|
34
|
-
def connect(self) -> None:
|
35
|
-
"""Establish database connection."""
|
36
|
-
try:
|
37
|
-
# Try different connection methods for MariaDB
|
38
|
-
connection_methods = [
|
39
|
-
# Use MySQL driver (MariaDB is MySQL-compatible)
|
40
|
-
f"mysql+pymysql://{self.user}:{self.password}@{self.host}:{self.port}/{self.dbname}",
|
41
|
-
# Use MySQL connector with explicit TCP
|
42
|
-
f"mysql+mysqlconnector://{self.user}:{self.password}@{self.host}:{self.port}/{self.dbname}",
|
43
|
-
# Use MariaDB connector with TCP parameters
|
44
|
-
f"mariadb+mariadbconnector://{self.user}:{self.password}@{self.host}:{self.port}/{self.dbname}?unix_socket=",
|
45
|
-
]
|
46
|
-
|
47
|
-
last_error = None
|
48
|
-
for connection_string in connection_methods:
|
49
|
-
try:
|
50
|
-
self.engine = create_engine(connection_string, pool_pre_ping=True)
|
51
|
-
# Test the connection
|
52
|
-
with self.engine.connect() as conn:
|
53
|
-
conn.execute(text("SELECT 1"))
|
54
|
-
self.connection = self.engine
|
55
|
-
self._initialized = True
|
56
|
-
logger.info(f"MariaDB connected using: {connection_string.split('://')[0]}")
|
57
|
-
return
|
58
|
-
except Exception as e:
|
59
|
-
last_error = e
|
60
|
-
logger.debug(f"MariaDB connection failed with {connection_string.split('://')[0]}: {e}")
|
61
|
-
if self.engine:
|
62
|
-
self.engine.dispose()
|
63
|
-
self.engine = None
|
64
|
-
continue
|
65
|
-
|
66
|
-
# If all methods fail, raise the last error
|
67
|
-
raise ConnectionError(f"Failed to connect to MariaDB: {last_error}")
|
68
|
-
|
69
|
-
except Exception as e:
|
70
|
-
raise ConnectionError(f"Failed to connect to MariaDB: {e}")
|
71
|
-
|
72
|
-
def disconnect(self) -> None:
|
73
|
-
"""Close database connection."""
|
74
|
-
if self.engine:
|
75
|
-
self.engine.dispose()
|
76
|
-
self.engine = None
|
77
|
-
self.connection = None
|
78
|
-
|
79
|
-
def execute_query(self, query: str, params: Optional[Dict] = None, fetch: Union[str, int] = "all", timeout: int = 60) -> Any:
|
80
|
-
"""Execute a query and return results."""
|
81
|
-
if not self.engine:
|
82
|
-
self.connect()
|
83
|
-
|
84
|
-
with self.engine.connect() as connection:
|
85
|
-
try:
|
86
|
-
if params:
|
87
|
-
result = connection.execute(text(query), params)
|
88
|
-
else:
|
89
|
-
result = connection.execute(text(query))
|
90
|
-
|
91
|
-
# Check if this is a query that returns rows (SELECT, SHOW, etc.)
|
92
|
-
query_upper = query.strip().upper()
|
93
|
-
if query_upper.startswith(('SELECT', 'SHOW', 'DESCRIBE', 'DESC', 'EXPLAIN', 'WITH')):
|
94
|
-
if fetch == "all":
|
95
|
-
return [row._asdict() for row in result.fetchall()]
|
96
|
-
elif fetch == "one":
|
97
|
-
row = result.fetchone()
|
98
|
-
return row._asdict() if row else None
|
99
|
-
elif isinstance(fetch, int) and fetch > 0:
|
100
|
-
return [row._asdict() for row in result.fetchmany(fetch)]
|
101
|
-
else:
|
102
|
-
return [row._asdict() for row in result.fetchall()]
|
103
|
-
else:
|
104
|
-
# For DDL/DML queries (CREATE, INSERT, UPDATE, DELETE), return rowcount
|
105
|
-
connection.commit()
|
106
|
-
return result.rowcount
|
107
|
-
except SQLAlchemyError as e:
|
108
|
-
logger.error(f"Error executing SQL: {str(e)}")
|
109
|
-
raise e
|
110
|
-
|
111
|
-
def get_tables_as_documents(self) -> List[TableDocument]:
|
112
|
-
"""Return tables as document objects."""
|
113
|
-
inspector = inspect(self.engine)
|
114
|
-
table_names = inspector.get_table_names()
|
115
|
-
tables = []
|
116
|
-
|
117
|
-
for table_name in table_names:
|
118
|
-
try:
|
119
|
-
table_comment = inspector.get_table_comment(table_name).get('text', '')
|
120
|
-
except SQLAlchemyError:
|
121
|
-
table_comment = ''
|
122
|
-
|
123
|
-
tables.append(TableDocument(
|
124
|
-
table_name=table_name,
|
125
|
-
schema_name="", # MariaDB doesn't have explicit schemas like PostgreSQL
|
126
|
-
comment=table_comment or "",
|
127
|
-
row_count=None # Could be populated if needed
|
128
|
-
))
|
129
|
-
|
130
|
-
return tables
|
131
|
-
|
132
|
-
def get_columns_as_documents(self, table_name: str) -> List[ColumnDocument]:
|
133
|
-
"""Return columns as document objects."""
|
134
|
-
inspector = inspect(self.engine)
|
135
|
-
columns_metadata = inspector.get_columns(table_name)
|
136
|
-
pk_columns = inspector.get_pk_constraint(table_name).get('constrained_columns', [])
|
137
|
-
|
138
|
-
columns = []
|
139
|
-
for col_meta in columns_metadata:
|
140
|
-
columns.append(ColumnDocument(
|
141
|
-
table_name=table_name,
|
142
|
-
column_name=col_meta['name'],
|
143
|
-
data_type=str(col_meta['type']),
|
144
|
-
is_nullable=col_meta.get('nullable', True),
|
145
|
-
is_pk=col_meta['name'] in pk_columns,
|
146
|
-
comment=col_meta.get('comment', '') or ""
|
147
|
-
))
|
148
|
-
|
149
|
-
return columns
|
150
|
-
|
151
|
-
def get_foreign_keys_as_documents(self) -> List[ForeignKeyDocument]:
|
152
|
-
"""Return foreign keys as document objects."""
|
153
|
-
inspector = inspect(self.engine)
|
154
|
-
all_foreign_keys = []
|
155
|
-
|
156
|
-
for table_name in inspector.get_table_names():
|
157
|
-
fks = inspector.get_foreign_keys(table_name)
|
158
|
-
for fk in fks:
|
159
|
-
all_foreign_keys.append(ForeignKeyDocument(
|
160
|
-
source_table_name=table_name,
|
161
|
-
source_column_name=fk['constrained_columns'][0],
|
162
|
-
target_table_name=fk['referred_table'],
|
163
|
-
target_column_name=fk['referred_columns'][0],
|
164
|
-
constraint_name=fk.get('name', '')
|
165
|
-
))
|
166
|
-
|
167
|
-
return all_foreign_keys
|
168
|
-
|
169
|
-
def get_schemas_as_documents(self) -> List[SchemaDocument]:
|
170
|
-
"""Return schemas as document objects."""
|
171
|
-
# MariaDB doesn't have explicit schemas like PostgreSQL
|
172
|
-
return [SchemaDocument(
|
173
|
-
schema_name="default",
|
174
|
-
comment="Default MariaDB schema"
|
175
|
-
)]
|
176
|
-
|
177
|
-
def get_indexes_as_documents(self, table_name: Optional[str] = None) -> List[IndexDocument]:
|
178
|
-
"""Return indexes as document objects."""
|
179
|
-
inspector = inspect(self.engine)
|
180
|
-
indexes = []
|
181
|
-
|
182
|
-
tables = [table_name] if table_name else inspector.get_table_names()
|
183
|
-
|
184
|
-
for tbl_name in tables:
|
185
|
-
try:
|
186
|
-
table_indexes = inspector.get_indexes(tbl_name)
|
187
|
-
for idx in table_indexes:
|
188
|
-
indexes.append(IndexDocument(
|
189
|
-
table_name=tbl_name,
|
190
|
-
index_name=idx['name'],
|
191
|
-
column_names=idx['column_names'],
|
192
|
-
is_unique=idx['unique'],
|
193
|
-
index_type="BTREE" # Default for MariaDB
|
194
|
-
))
|
195
|
-
except SQLAlchemyError as e:
|
196
|
-
logger.warning(f"Could not get indexes for table {tbl_name}: {e}")
|
197
|
-
|
198
|
-
return indexes
|
199
|
-
|
200
|
-
def get_unique_values(self) -> Dict[str, Dict[str, List[str]]]:
|
201
|
-
"""Get unique values from the database."""
|
202
|
-
# This is a placeholder implementation.
|
203
|
-
# A more sophisticated version like in ThothPgManager should be implemented.
|
204
|
-
return {}
|
205
|
-
|
206
|
-
def get_example_data(self, table_name: str, number_of_rows: int = 30) -> Dict[str, List[Any]]:
|
207
|
-
"""Get example data (most frequent values) for each column in a table."""
|
208
|
-
inspector = inspect(self.engine)
|
209
|
-
try:
|
210
|
-
columns = inspector.get_columns(table_name)
|
211
|
-
except SQLAlchemyError as e:
|
212
|
-
logger.error(f"Error inspecting columns for table {table_name}: {e}")
|
213
|
-
raise e
|
214
|
-
|
215
|
-
if not columns:
|
216
|
-
logger.warning(f"No columns found for table {table_name}")
|
217
|
-
return {}
|
218
|
-
|
219
|
-
most_frequent_values: Dict[str, List[Any]] = {}
|
220
|
-
|
221
|
-
with self.engine.connect() as connection:
|
222
|
-
for col_info in columns:
|
223
|
-
column_name = col_info['name']
|
224
|
-
# MariaDB uses backticks for identifier quoting (same as MySQL)
|
225
|
-
quoted_column_name = f'`{column_name}`'
|
226
|
-
quoted_table_name = f'`{table_name}`'
|
227
|
-
|
228
|
-
query_str = f"""
|
229
|
-
SELECT {quoted_column_name}
|
230
|
-
FROM (
|
231
|
-
SELECT {quoted_column_name}, COUNT(*) as _freq
|
232
|
-
FROM {quoted_table_name}
|
233
|
-
WHERE {quoted_column_name} IS NOT NULL
|
234
|
-
GROUP BY {quoted_column_name}
|
235
|
-
ORDER BY _freq DESC
|
236
|
-
LIMIT :num_rows
|
237
|
-
) as subquery;
|
238
|
-
"""
|
239
|
-
try:
|
240
|
-
result = connection.execute(text(query_str), {"num_rows": number_of_rows})
|
241
|
-
values = [row[0] for row in result]
|
242
|
-
most_frequent_values[column_name] = values
|
243
|
-
except SQLAlchemyError as e:
|
244
|
-
logger.error(f"Error fetching frequent values for {column_name} in {table_name}: {e}")
|
245
|
-
most_frequent_values[column_name] = []
|
246
|
-
|
247
|
-
# Normalize list lengths
|
248
|
-
max_length = 0
|
249
|
-
if most_frequent_values:
|
250
|
-
max_length = max(len(v) for v in most_frequent_values.values()) if most_frequent_values else 0
|
251
|
-
|
252
|
-
for column_name in most_frequent_values:
|
253
|
-
current_len = len(most_frequent_values[column_name])
|
254
|
-
if current_len < max_length:
|
255
|
-
most_frequent_values[column_name].extend([None] * (max_length - current_len))
|
256
|
-
|
257
|
-
return most_frequent_values
|
258
|
-
|
259
|
-
|
260
22
|
@register_plugin("mariadb")
|
261
23
|
class MariaDBPlugin(DbPlugin):
|
262
|
-
"""MariaDB database plugin
|
24
|
+
"""MariaDB database plugin."""
|
263
25
|
|
264
26
|
plugin_name = "MariaDB Plugin"
|
265
27
|
plugin_version = "1.0.0"
|
@@ -315,11 +77,19 @@ class MariaDBPlugin(DbPlugin):
|
|
315
77
|
|
316
78
|
def create_adapter(self, **kwargs) -> DbAdapter:
|
317
79
|
"""Create and return a MariaDB adapter instance."""
|
318
|
-
|
80
|
+
# Map plugin parameters to adapter parameters
|
81
|
+
connection_params = {
|
82
|
+
'host': kwargs.get('host', 'localhost'),
|
83
|
+
'port': kwargs.get('port', 3307),
|
84
|
+
'database': kwargs.get('database') or kwargs.get('dbname'),
|
85
|
+
'user': kwargs.get('user') or kwargs.get('username'),
|
86
|
+
'password': kwargs.get('password')
|
87
|
+
}
|
88
|
+
return MariaDBAdapter(connection_params)
|
319
89
|
|
320
90
|
def validate_connection_params(self, **kwargs) -> bool:
|
321
91
|
"""Validate connection parameters for MariaDB."""
|
322
|
-
required = ['host', '
|
92
|
+
required = ['host', 'user', 'password']
|
323
93
|
database = kwargs.get('database') or kwargs.get('dbname')
|
324
94
|
|
325
95
|
if not database:
|
@@ -331,7 +101,7 @@ class MariaDBPlugin(DbPlugin):
|
|
331
101
|
logger.error(f"Missing required parameter: {param}")
|
332
102
|
return False
|
333
103
|
|
334
|
-
port = kwargs.get('port')
|
104
|
+
port = kwargs.get('port', 3307)
|
335
105
|
if not isinstance(port, int) or not (1 <= port <= 65535):
|
336
106
|
logger.error("port must be an integer between 1 and 65535")
|
337
107
|
return False
|
@@ -341,8 +111,8 @@ class MariaDBPlugin(DbPlugin):
|
|
341
111
|
def initialize(self, **kwargs) -> None:
|
342
112
|
"""Initialize the MariaDB plugin."""
|
343
113
|
# Validate and extract parameters
|
344
|
-
self.host = kwargs.get('host')
|
345
|
-
self.port = kwargs.get('port',
|
114
|
+
self.host = kwargs.get('host', 'localhost')
|
115
|
+
self.port = kwargs.get('port', 3307)
|
346
116
|
self.dbname = kwargs.get('database') or kwargs.get('dbname')
|
347
117
|
self.user = kwargs.get('user') or kwargs.get('username')
|
348
118
|
self.password = kwargs.get('password')
|
@@ -434,3 +204,13 @@ class MariaDBPlugin(DbPlugin):
|
|
434
204
|
return self.adapter.get_example_data(table_name, number_of_rows)
|
435
205
|
else:
|
436
206
|
raise RuntimeError("Plugin not initialized")
|
207
|
+
|
208
|
+
@classmethod
|
209
|
+
def get_required_parameters(cls) -> List[str]:
|
210
|
+
"""Get list of required connection parameters."""
|
211
|
+
return ['host', 'port', 'database', 'user', 'password']
|
212
|
+
|
213
|
+
@classmethod
|
214
|
+
def get_optional_parameters(cls) -> List[str]:
|
215
|
+
"""Get list of optional connection parameters."""
|
216
|
+
return ['db_root_path', 'db_mode']
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: thoth_dbmanager
|
3
|
-
Version: 0.5.
|
3
|
+
Version: 0.5.8
|
4
4
|
Summary: A Python library for managing SQL databases with support for multiple database types, LSH-based similarity search, and a modern plugin architecture.
|
5
5
|
Author-email: Marco Pancotti <mp@tylconsulting.it>
|
6
6
|
Project-URL: Homepage, https://github.com/mptyl/thoth_dbmanager
|
@@ -22,11 +22,11 @@ Classifier: Development Status :: 4 - Beta
|
|
22
22
|
Requires-Python: >=3.9
|
23
23
|
Description-Content-Type: text/markdown
|
24
24
|
License-File: LICENSE
|
25
|
+
License-File: LICENSE.md
|
25
26
|
Requires-Dist: datasketch>=1.5.0
|
26
27
|
Requires-Dist: tqdm>=4.60.0
|
27
28
|
Requires-Dist: SQLAlchemy>=1.4.0
|
28
29
|
Requires-Dist: pydantic>=2.0.0
|
29
|
-
Requires-Dist: pandas>=1.3.0
|
30
30
|
Requires-Dist: requests>=2.25.0
|
31
31
|
Provides-Extra: postgresql
|
32
32
|
Requires-Dist: psycopg2-binary>=2.9.0; extra == "postgresql"
|
@@ -35,15 +35,10 @@ Requires-Dist: mariadb>=1.1.0; extra == "mariadb"
|
|
35
35
|
Provides-Extra: sqlserver
|
36
36
|
Requires-Dist: pyodbc>=4.0.0; extra == "sqlserver"
|
37
37
|
Provides-Extra: sqlite
|
38
|
-
Provides-Extra: embeddings
|
39
|
-
Requires-Dist: sentence-transformers>=2.0.0; extra == "embeddings"
|
40
|
-
Requires-Dist: numpy>=1.21.0; extra == "embeddings"
|
41
38
|
Provides-Extra: all
|
42
39
|
Requires-Dist: psycopg2-binary>=2.9.0; extra == "all"
|
43
40
|
Requires-Dist: mariadb>=1.1.0; extra == "all"
|
44
41
|
Requires-Dist: pyodbc>=4.0.0; extra == "all"
|
45
|
-
Requires-Dist: sentence-transformers>=2.0.0; extra == "all"
|
46
|
-
Requires-Dist: numpy>=1.21.0; extra == "all"
|
47
42
|
Provides-Extra: dev
|
48
43
|
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
49
44
|
Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
|
@@ -1,15 +1,15 @@
|
|
1
1
|
thoth_dbmanager/ThothDbManager.py,sha256=q-jctgt3MJCDFzq6icQdP1oLeVy1ypg402F4ybxhG8c,8943
|
2
|
-
thoth_dbmanager/__init__.py,sha256=
|
2
|
+
thoth_dbmanager/__init__.py,sha256=tsW5bAgsXd4m2bZmcQZ5G8NHoSg-F4h1tiXwpXFAFXs,1655
|
3
3
|
thoth_dbmanager/documents.py,sha256=z-f7zo_CZHqoGM0qHT8-lSUx4NhnMNZTSajpoFtRxn4,5051
|
4
4
|
thoth_dbmanager/dynamic_imports.py,sha256=xDahgiqKvwSYqjPgHiQqD1XPhAbM_JqnU3OhBp2N-fc,7013
|
5
|
-
thoth_dbmanager/adapters/__init__.py,sha256=
|
6
|
-
thoth_dbmanager/adapters/mariadb.py,sha256=
|
5
|
+
thoth_dbmanager/adapters/__init__.py,sha256=5q15dASHBVqsoNj-l1t371VtfuBjhxDAhW68COYI6QI,999
|
6
|
+
thoth_dbmanager/adapters/mariadb.py,sha256=PkFHUDism4X_P5W26NDsxdK2T_rmKlPHdwALOXWMExY,15301
|
7
7
|
thoth_dbmanager/adapters/postgresql.py,sha256=qxdlxOV7Nvn8U4Lhat50w87Z2S8AzBfmLfEwKfz7dis,17299
|
8
8
|
thoth_dbmanager/adapters/sqlite.py,sha256=RTDszgnAtkE14LKFeoe9lBHgsqXqkmDk6jDCTmVpnoM,14659
|
9
|
-
thoth_dbmanager/adapters/sqlserver.py,sha256=
|
9
|
+
thoth_dbmanager/adapters/sqlserver.py,sha256=7RGZ3qQTJCfVyvjOj4jQ9G2NAD9vtmzcho4Dbi3is_o,24557
|
10
10
|
thoth_dbmanager/core/__init__.py,sha256=FlqNW0GZNv1rnwNgyXGzveLqaw0Z90y5AKhR_1DvHBE,269
|
11
|
-
thoth_dbmanager/core/factory.py,sha256=
|
12
|
-
thoth_dbmanager/core/interfaces.py,sha256=
|
11
|
+
thoth_dbmanager/core/factory.py,sha256=zPqyo2kVPvSsjKrzfyg5rEwqKpay6RmymQ1NZr15Onw,8958
|
12
|
+
thoth_dbmanager/core/interfaces.py,sha256=s6t-8w4QWu_4Dl654LAU2p3Ao34wjeNaGEsUOJwYHaM,9575
|
13
13
|
thoth_dbmanager/core/registry.py,sha256=url4qpQMoMw4rDrdAAvV6L7-NdO4z86xSJPSwTH_l5g,8624
|
14
14
|
thoth_dbmanager/helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
15
15
|
thoth_dbmanager/helpers/multi_db_generator.py,sha256=frN0SZtWAfeojoJFLs4XLR3ri6h9pHYc-2O4aLAOlbo,23238
|
@@ -21,13 +21,14 @@ thoth_dbmanager/lsh/core.py,sha256=171FqHW7ItAqAPk6g_AoayKTE3Bs1rRZxnt55MJVzjY,6
|
|
21
21
|
thoth_dbmanager/lsh/factory.py,sha256=2Bpkk-OygjaptZAw1yysxO1cxG3QTxmJ1yFGcXHqX3w,2411
|
22
22
|
thoth_dbmanager/lsh/manager.py,sha256=LGrKbGKiBuISlNXaU4Yxfc_BqJfN27MaXapJbzEAjJQ,6513
|
23
23
|
thoth_dbmanager/lsh/storage.py,sha256=qei6fwpmRCBSS8CRtDlnZCuWEmyuOK9gVSTkEJdX0eI,4543
|
24
|
-
thoth_dbmanager/plugins/__init__.py,sha256=
|
25
|
-
thoth_dbmanager/plugins/mariadb.py,sha256=
|
24
|
+
thoth_dbmanager/plugins/__init__.py,sha256=J_ojrJNK1_xeAXbmk8UdiYfqG9vCoD6OI99QxajFM3E,1157
|
25
|
+
thoth_dbmanager/plugins/mariadb.py,sha256=p4ey4_bfgAogLaM9lWgvFiWNrX-tElKSnnT0t6o-xo0,8212
|
26
26
|
thoth_dbmanager/plugins/postgresql.py,sha256=pI1W9oHpQty8tHMoEDcsOT-Msv6S4aoFcArOGFxLR7Q,5518
|
27
27
|
thoth_dbmanager/plugins/sqlite.py,sha256=gkgZ6-Vjkab0IP3ffHOg4bbpDHsjO_N4DesUnSDNAmQ,8857
|
28
28
|
thoth_dbmanager/plugins/sqlserver.py,sha256=mMb3F5FmSWV02FZwj-Ult-2TjuyeVA4Fl1iME1dbgLU,5289
|
29
|
-
thoth_dbmanager-0.5.
|
30
|
-
thoth_dbmanager-0.5.
|
31
|
-
thoth_dbmanager-0.5.
|
32
|
-
thoth_dbmanager-0.5.
|
33
|
-
thoth_dbmanager-0.5.
|
29
|
+
thoth_dbmanager-0.5.8.dist-info/licenses/LICENSE,sha256=81-BOzGgwtY1XdYfkwMQB87AkOGXI9OMq0kjNcZA4UE,1071
|
30
|
+
thoth_dbmanager-0.5.8.dist-info/licenses/LICENSE.md,sha256=TtKT2ej3GRki3W8FmSeYzRuQtZ1oMGWchpWykZ4vA7c,1070
|
31
|
+
thoth_dbmanager-0.5.8.dist-info/METADATA,sha256=clF9gFLahTJJX9HxDYK8kiZgAsgKwoHwjV0qvhpdmOY,14777
|
32
|
+
thoth_dbmanager-0.5.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
33
|
+
thoth_dbmanager-0.5.8.dist-info/top_level.txt,sha256=b9ttxm9RUc0KUCASEKRx6FqoREYJ1-KZWSpNuaM0uQ4,16
|
34
|
+
thoth_dbmanager-0.5.8.dist-info/RECORD,,
|
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2025 Marco Pancotti
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
File without changes
|
File without changes
|
File without changes
|