thoth-dbmanager 0.5.3__py3-none-any.whl → 0.5.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- thoth_dbmanager/ThothDbManager.py +14 -0
- thoth_dbmanager/__init__.py +15 -1
- thoth_dbmanager/adapters/__init__.py +44 -6
- thoth_dbmanager/adapters/mariadb.py +361 -129
- thoth_dbmanager/adapters/postgresql.py +49 -23
- thoth_dbmanager/adapters/sqlite.py +14 -1
- thoth_dbmanager/adapters/sqlserver.py +38 -9
- thoth_dbmanager/core/__init__.py +14 -0
- thoth_dbmanager/core/factory.py +17 -0
- thoth_dbmanager/core/interfaces.py +14 -0
- thoth_dbmanager/core/registry.py +14 -0
- thoth_dbmanager/documents.py +14 -0
- thoth_dbmanager/dynamic_imports.py +14 -0
- thoth_dbmanager/helpers/__init__.py +13 -0
- thoth_dbmanager/helpers/multi_db_generator.py +14 -0
- thoth_dbmanager/helpers/preprocess_values.py +14 -0
- thoth_dbmanager/helpers/schema.py +14 -0
- thoth_dbmanager/helpers/search.py +14 -0
- thoth_dbmanager/lsh/__init__.py +14 -0
- thoth_dbmanager/lsh/core.py +14 -0
- thoth_dbmanager/lsh/factory.py +14 -0
- thoth_dbmanager/lsh/manager.py +14 -0
- thoth_dbmanager/lsh/storage.py +14 -0
- thoth_dbmanager/plugins/__init__.py +47 -8
- thoth_dbmanager/plugins/mariadb.py +41 -251
- thoth_dbmanager/plugins/postgresql.py +14 -0
- thoth_dbmanager/plugins/sqlite.py +14 -0
- thoth_dbmanager/plugins/sqlserver.py +14 -0
- {thoth_dbmanager-0.5.3.dist-info → thoth_dbmanager-0.5.9.dist-info}/METADATA +2 -1
- thoth_dbmanager-0.5.9.dist-info/RECORD +34 -0
- thoth_dbmanager-0.5.9.dist-info/licenses/LICENSE.md +21 -0
- thoth_dbmanager-0.5.3.dist-info/RECORD +0 -33
- {thoth_dbmanager-0.5.3.dist-info → thoth_dbmanager-0.5.9.dist-info}/WHEEL +0 -0
- {thoth_dbmanager-0.5.3.dist-info → thoth_dbmanager-0.5.9.dist-info}/licenses/LICENSE +0 -0
- {thoth_dbmanager-0.5.3.dist-info → thoth_dbmanager-0.5.9.dist-info}/top_level.txt +0 -0
@@ -1,3 +1,17 @@
|
|
1
|
+
# Copyright 2025 Marco Pancotti
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
1
15
|
import logging
|
2
16
|
from abc import ABC, abstractmethod
|
3
17
|
from pathlib import Path
|
thoth_dbmanager/__init__.py
CHANGED
@@ -1,3 +1,17 @@
|
|
1
|
+
# Copyright 2025 Marco Pancotti
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
1
15
|
"""
|
2
16
|
Thoth Database Manager - A unified interface for multiple database systems.
|
3
17
|
|
@@ -72,4 +86,4 @@ __all__ = [
|
|
72
86
|
"DatabaseImportError",
|
73
87
|
]
|
74
88
|
|
75
|
-
__version__ = "0.5.
|
89
|
+
__version__ = "0.5.7"
|
@@ -1,15 +1,53 @@
|
|
1
|
+
# Copyright 2025 Marco Pancotti
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
1
15
|
"""
|
2
16
|
Database adapters for Thoth SQL Database Manager.
|
3
17
|
"""
|
4
18
|
|
5
|
-
|
19
|
+
import logging
|
20
|
+
|
21
|
+
logger = logging.getLogger(__name__)
|
22
|
+
|
23
|
+
# Always available adapter (SQLite is built into Python)
|
6
24
|
from .sqlite import SQLiteAdapter
|
7
|
-
from .mariadb import MariaDBAdapter
|
8
|
-
from .sqlserver import SQLServerAdapter
|
9
25
|
|
10
26
|
__all__ = [
|
11
|
-
"PostgreSQLAdapter",
|
12
27
|
"SQLiteAdapter",
|
13
|
-
"MariaDBAdapter",
|
14
|
-
"SQLServerAdapter",
|
15
28
|
]
|
29
|
+
|
30
|
+
# Optional adapters - only import if dependencies are available
|
31
|
+
try:
|
32
|
+
import psycopg2
|
33
|
+
from .postgresql import PostgreSQLAdapter
|
34
|
+
__all__.append("PostgreSQLAdapter")
|
35
|
+
except ImportError:
|
36
|
+
logger.debug("psycopg2 not installed, PostgreSQLAdapter not available")
|
37
|
+
PostgreSQLAdapter = None
|
38
|
+
|
39
|
+
try:
|
40
|
+
import mariadb
|
41
|
+
from .mariadb import MariaDBAdapter
|
42
|
+
__all__.append("MariaDBAdapter")
|
43
|
+
except ImportError:
|
44
|
+
logger.debug("MariaDB connector not installed, MariaDBAdapter not available")
|
45
|
+
MariaDBAdapter = None
|
46
|
+
|
47
|
+
try:
|
48
|
+
import pyodbc
|
49
|
+
from .sqlserver import SQLServerAdapter
|
50
|
+
__all__.append("SQLServerAdapter")
|
51
|
+
except ImportError:
|
52
|
+
logger.debug("pyodbc not installed, SQLServerAdapter not available")
|
53
|
+
SQLServerAdapter = None
|
@@ -1,165 +1,397 @@
|
|
1
|
+
# Copyright 2025 Marco Pancotti
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
1
15
|
"""
|
2
|
-
MariaDB adapter
|
16
|
+
MariaDB adapter implementation.
|
3
17
|
"""
|
4
|
-
|
5
|
-
from typing import Any, Dict, List, Optional
|
6
|
-
|
7
|
-
from sqlalchemy
|
18
|
+
import logging
|
19
|
+
from typing import Any, Dict, List, Optional, Union
|
20
|
+
import mariadb
|
21
|
+
from sqlalchemy import create_engine, text, inspect
|
8
22
|
from sqlalchemy.exc import SQLAlchemyError
|
9
23
|
|
10
24
|
from ..core.interfaces import DbAdapter
|
25
|
+
from ..documents import (
|
26
|
+
TableDocument,
|
27
|
+
ColumnDocument,
|
28
|
+
SchemaDocument,
|
29
|
+
ForeignKeyDocument,
|
30
|
+
IndexDocument
|
31
|
+
)
|
32
|
+
|
33
|
+
logger = logging.getLogger(__name__)
|
11
34
|
|
12
35
|
|
13
36
|
class MariaDBAdapter(DbAdapter):
|
14
|
-
"""
|
37
|
+
"""
|
38
|
+
MariaDB database adapter implementation.
|
39
|
+
"""
|
15
40
|
|
16
|
-
def __init__(self,
|
17
|
-
|
18
|
-
Initialize MariaDB adapter.
|
19
|
-
|
20
|
-
Args:
|
21
|
-
connection_string: MariaDB connection string
|
22
|
-
**kwargs: Additional connection parameters
|
23
|
-
"""
|
24
|
-
self.connection_string = connection_string
|
41
|
+
def __init__(self, connection_params: Dict[str, Any]):
|
42
|
+
super().__init__(connection_params)
|
25
43
|
self.engine = None
|
26
|
-
self.
|
27
|
-
|
44
|
+
self.raw_connection = None
|
45
|
+
self.host = connection_params.get('host', 'localhost')
|
46
|
+
self.port = connection_params.get('port', 3307)
|
47
|
+
self.database = connection_params.get('database')
|
48
|
+
self.user = connection_params.get('user')
|
49
|
+
self.password = connection_params.get('password')
|
50
|
+
|
28
51
|
def connect(self) -> None:
|
29
|
-
"""Establish
|
52
|
+
"""Establish MariaDB connection"""
|
30
53
|
try:
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
54
|
+
# Create SQLAlchemy engine
|
55
|
+
connection_string = self._build_connection_string()
|
56
|
+
self.engine = create_engine(connection_string, echo=False)
|
57
|
+
|
58
|
+
# Test connection
|
59
|
+
with self.engine.connect() as conn:
|
60
|
+
conn.execute(text("SELECT 1"))
|
61
|
+
|
62
|
+
# Also create raw mariadb connection for specific operations
|
63
|
+
self.raw_connection = mariadb.connect(
|
64
|
+
host=self.host,
|
65
|
+
port=self.port,
|
66
|
+
database=self.database,
|
67
|
+
user=self.user,
|
68
|
+
password=self.password
|
35
69
|
)
|
70
|
+
|
71
|
+
self._initialized = True
|
72
|
+
logger.info("MariaDB connection established successfully")
|
73
|
+
|
36
74
|
except Exception as e:
|
37
|
-
|
75
|
+
logger.error(f"Failed to connect to MariaDB: {e}")
|
76
|
+
raise
|
38
77
|
|
39
78
|
def disconnect(self) -> None:
|
40
|
-
"""Close
|
41
|
-
|
42
|
-
self.engine
|
43
|
-
|
79
|
+
"""Close MariaDB connection"""
|
80
|
+
try:
|
81
|
+
if self.engine:
|
82
|
+
self.engine.dispose()
|
83
|
+
self.engine = None
|
84
|
+
|
85
|
+
if self.raw_connection:
|
86
|
+
self.raw_connection.close()
|
87
|
+
self.raw_connection = None
|
88
|
+
|
89
|
+
self._initialized = False
|
90
|
+
logger.info("MariaDB connection closed")
|
91
|
+
|
92
|
+
except Exception as e:
|
93
|
+
logger.error(f"Error closing MariaDB connection: {e}")
|
94
|
+
|
95
|
+
def _build_connection_string(self) -> str:
|
96
|
+
"""Build SQLAlchemy connection string for MariaDB"""
|
97
|
+
if not all([self.database, self.user, self.password]):
|
98
|
+
raise ValueError("Missing required connection parameters: database, user, password")
|
99
|
+
|
100
|
+
# MariaDB uses mysql+pymysql or mariadb+mariadbconnector dialect
|
101
|
+
return f"mariadb+mariadbconnector://{self.user}:{self.password}@{self.host}:{self.port}/{self.database}"
|
44
102
|
|
45
|
-
def execute_query(self, query: str, params: Optional[Dict[str,
|
46
|
-
"""Execute
|
103
|
+
def execute_query(self, query: str, params: Optional[Dict] = None, fetch: Union[str, int] = "all", timeout: int = 60) -> Any:
|
104
|
+
"""Execute SQL query"""
|
47
105
|
if not self.engine:
|
48
|
-
|
49
|
-
|
106
|
+
raise RuntimeError("Not connected to database")
|
107
|
+
|
50
108
|
try:
|
51
109
|
with self.engine.connect() as conn:
|
52
|
-
|
53
|
-
|
110
|
+
# MariaDB doesn't have direct query timeout in the same way
|
111
|
+
# but we can set connection timeout
|
112
|
+
conn.execute(text(f"SET SESSION max_statement_time = {timeout}"))
|
113
|
+
|
114
|
+
# Execute query
|
115
|
+
if params:
|
116
|
+
result = conn.execute(text(query), params)
|
117
|
+
else:
|
118
|
+
result = conn.execute(text(query))
|
119
|
+
|
120
|
+
# Handle different fetch modes
|
121
|
+
if query.strip().upper().startswith(('SELECT', 'WITH', 'SHOW', 'DESCRIBE')):
|
122
|
+
if fetch == "all":
|
123
|
+
return [dict(row._mapping) for row in result]
|
124
|
+
elif fetch == "one":
|
125
|
+
row = result.first()
|
126
|
+
return dict(row._mapping) if row else None
|
127
|
+
elif isinstance(fetch, int):
|
128
|
+
rows = result.fetchmany(fetch)
|
129
|
+
return [dict(row._mapping) for row in rows]
|
130
|
+
else:
|
131
|
+
# For INSERT, UPDATE, DELETE
|
132
|
+
conn.commit()
|
133
|
+
return result.rowcount
|
134
|
+
|
54
135
|
except SQLAlchemyError as e:
|
55
|
-
|
136
|
+
logger.error(f"MariaDB query execution failed: {e}")
|
137
|
+
raise
|
56
138
|
|
57
|
-
def
|
58
|
-
"""
|
139
|
+
def get_tables_as_documents(self) -> List[TableDocument]:
|
140
|
+
"""Return tables as document objects"""
|
59
141
|
if not self.engine:
|
60
|
-
|
61
|
-
|
142
|
+
raise RuntimeError("Not connected to database")
|
143
|
+
|
62
144
|
try:
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
145
|
+
inspector = inspect(self.engine)
|
146
|
+
tables = []
|
147
|
+
|
148
|
+
for table_name in inspector.get_table_names():
|
149
|
+
# Get row count
|
150
|
+
count_result = self.execute_query(f"SELECT COUNT(*) as count FROM {table_name}", fetch="one")
|
151
|
+
row_count = count_result.get('count', 0) if count_result else 0
|
152
|
+
|
153
|
+
# Get column count
|
154
|
+
columns = inspector.get_columns(table_name)
|
155
|
+
|
156
|
+
# Get table comment (if available)
|
157
|
+
table_comment = ""
|
158
|
+
try:
|
159
|
+
comment_result = self.execute_query(
|
160
|
+
f"SELECT table_comment FROM information_schema.tables WHERE table_name = '{table_name}'",
|
161
|
+
fetch="one"
|
162
|
+
)
|
163
|
+
table_comment = comment_result.get('table_comment', '') if comment_result else ''
|
164
|
+
except:
|
165
|
+
pass
|
166
|
+
|
167
|
+
tables.append(TableDocument(
|
168
|
+
table_name=table_name,
|
169
|
+
table_type="TABLE",
|
170
|
+
row_count=row_count,
|
171
|
+
column_count=len(columns),
|
172
|
+
description=table_comment
|
173
|
+
))
|
174
|
+
|
175
|
+
return tables
|
176
|
+
|
177
|
+
except Exception as e:
|
178
|
+
logger.error(f"Error getting tables as documents: {e}")
|
179
|
+
raise
|
75
180
|
|
76
|
-
def
|
77
|
-
"""
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
schema = {
|
82
|
-
'table_name': table_name,
|
83
|
-
'columns': []
|
84
|
-
}
|
181
|
+
def get_columns_as_documents(self, table_name: str) -> List[ColumnDocument]:
|
182
|
+
"""Return columns as document objects"""
|
183
|
+
if not self.engine:
|
184
|
+
raise RuntimeError("Not connected to database")
|
85
185
|
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
186
|
+
try:
|
187
|
+
inspector = inspect(self.engine)
|
188
|
+
columns = []
|
189
|
+
|
190
|
+
for col in inspector.get_columns(table_name):
|
191
|
+
columns.append(ColumnDocument(
|
192
|
+
table_name=table_name,
|
193
|
+
column_name=col['name'],
|
194
|
+
data_type=str(col['type']),
|
195
|
+
is_nullable=col.get('nullable', True),
|
196
|
+
column_default=col.get('default'),
|
197
|
+
is_pk=col.get('primary_key', False),
|
198
|
+
column_comment=col.get('comment', '')
|
199
|
+
))
|
200
|
+
|
201
|
+
# Mark primary keys
|
202
|
+
pk_constraint = inspector.get_pk_constraint(table_name)
|
203
|
+
if pk_constraint and pk_constraint.get('constrained_columns'):
|
204
|
+
pk_columns = pk_constraint['constrained_columns']
|
205
|
+
for col in columns:
|
206
|
+
if col.column_name in pk_columns:
|
207
|
+
col.is_pk = True
|
208
|
+
|
209
|
+
return columns
|
210
|
+
|
211
|
+
except Exception as e:
|
212
|
+
logger.error(f"Error getting columns as documents: {e}")
|
213
|
+
raise
|
214
|
+
|
215
|
+
def get_foreign_keys_as_documents(self) -> List[ForeignKeyDocument]:
|
216
|
+
"""Return foreign keys as document objects"""
|
217
|
+
if not self.engine:
|
218
|
+
raise RuntimeError("Not connected to database")
|
94
219
|
|
95
|
-
|
220
|
+
try:
|
221
|
+
inspector = inspect(self.engine)
|
222
|
+
foreign_keys = []
|
223
|
+
|
224
|
+
for table_name in inspector.get_table_names():
|
225
|
+
for fk in inspector.get_foreign_keys(table_name):
|
226
|
+
# Each foreign key can have multiple column pairs
|
227
|
+
for i, const_col in enumerate(fk['constrained_columns']):
|
228
|
+
foreign_keys.append(ForeignKeyDocument(
|
229
|
+
constraint_name=fk['name'],
|
230
|
+
table_name=table_name,
|
231
|
+
column_name=const_col,
|
232
|
+
foreign_table_name=fk['referred_table'],
|
233
|
+
foreign_column_name=fk['referred_columns'][i] if i < len(fk['referred_columns']) else None
|
234
|
+
))
|
235
|
+
|
236
|
+
return foreign_keys
|
237
|
+
|
238
|
+
except Exception as e:
|
239
|
+
logger.error(f"Error getting foreign keys as documents: {e}")
|
240
|
+
raise
|
96
241
|
|
97
|
-
def
|
98
|
-
"""
|
99
|
-
|
100
|
-
|
242
|
+
def get_schemas_as_documents(self) -> List[SchemaDocument]:
|
243
|
+
"""Return schemas as document objects"""
|
244
|
+
# MariaDB uses database as schema concept
|
245
|
+
if not self.engine:
|
246
|
+
raise RuntimeError("Not connected to database")
|
101
247
|
|
102
|
-
|
103
|
-
|
104
|
-
result.
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
248
|
+
try:
|
249
|
+
# Get current database as schema
|
250
|
+
result = self.execute_query("SELECT DATABASE() as db_name", fetch="one")
|
251
|
+
current_db = result.get('db_name') if result else self.database
|
252
|
+
|
253
|
+
# Get table count for current database
|
254
|
+
tables = self.get_tables_as_documents()
|
255
|
+
|
256
|
+
return [SchemaDocument(
|
257
|
+
catalog_name=current_db,
|
258
|
+
schema_name=current_db,
|
259
|
+
schema_owner=self.user,
|
260
|
+
table_count=len(tables)
|
261
|
+
)]
|
262
|
+
|
263
|
+
except Exception as e:
|
264
|
+
logger.error(f"Error getting schemas as documents: {e}")
|
265
|
+
raise
|
266
|
+
|
267
|
+
def get_indexes_as_documents(self, table_name: Optional[str] = None) -> List[IndexDocument]:
|
268
|
+
"""Return indexes as document objects"""
|
269
|
+
if not self.engine:
|
270
|
+
raise RuntimeError("Not connected to database")
|
110
271
|
|
111
|
-
|
272
|
+
try:
|
273
|
+
inspector = inspect(self.engine)
|
274
|
+
indexes = []
|
275
|
+
|
276
|
+
# Get tables to process
|
277
|
+
tables = [table_name] if table_name else inspector.get_table_names()
|
278
|
+
|
279
|
+
for tbl in tables:
|
280
|
+
for idx in inspector.get_indexes(tbl):
|
281
|
+
indexes.append(IndexDocument(
|
282
|
+
table_name=tbl,
|
283
|
+
index_name=idx['name'],
|
284
|
+
column_names=idx['column_names'],
|
285
|
+
is_unique=idx.get('unique', False),
|
286
|
+
index_type='BTREE' # MariaDB default
|
287
|
+
))
|
288
|
+
|
289
|
+
return indexes
|
290
|
+
|
291
|
+
except Exception as e:
|
292
|
+
logger.error(f"Error getting indexes as documents: {e}")
|
293
|
+
raise
|
112
294
|
|
113
|
-
def
|
114
|
-
"""Get foreign key information for a table."""
|
115
|
-
query = f"""
|
116
|
-
SELECT
|
117
|
-
CONSTRAINT_NAME as name,
|
118
|
-
COLUMN_NAME as column_name,
|
119
|
-
REFERENCED_TABLE_NAME as referenced_table,
|
120
|
-
REFERENCED_COLUMN_NAME as referenced_column
|
121
|
-
FROM INFORMATION_SCHEMA.KEY_COLUMN_USAGE
|
122
|
-
WHERE TABLE_NAME = '{table_name}'
|
123
|
-
AND REFERENCED_TABLE_NAME IS NOT NULL
|
295
|
+
def get_unique_values(self) -> Dict[str, Dict[str, List[str]]]:
|
124
296
|
"""
|
297
|
+
Get unique values from the database.
|
125
298
|
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
col_def += " NOT NULL"
|
135
|
-
if col.get('default') is not None:
|
136
|
-
col_def += f" DEFAULT {col['default']}"
|
137
|
-
if col.get('primary_key'):
|
138
|
-
col_def += " PRIMARY KEY"
|
139
|
-
columns.append(col_def)
|
299
|
+
Returns:
|
300
|
+
Dict[str, Dict[str, List[str]]]: Dictionary where:
|
301
|
+
- outer key is table name
|
302
|
+
- inner key is column name
|
303
|
+
- value is list of unique values
|
304
|
+
"""
|
305
|
+
if not self.engine:
|
306
|
+
raise RuntimeError("Not connected to database")
|
140
307
|
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
308
|
+
try:
|
309
|
+
inspector = inspect(self.engine)
|
310
|
+
unique_values = {}
|
311
|
+
|
312
|
+
for table_name in inspector.get_table_names():
|
313
|
+
unique_values[table_name] = {}
|
314
|
+
|
315
|
+
for col in inspector.get_columns(table_name):
|
316
|
+
col_name = col['name']
|
317
|
+
# Only get unique values for reasonable data types
|
318
|
+
col_type = str(col['type']).upper()
|
319
|
+
|
320
|
+
if any(t in col_type for t in ['VARCHAR', 'CHAR', 'TEXT', 'INT', 'ENUM']):
|
321
|
+
try:
|
322
|
+
query = f"SELECT DISTINCT `{col_name}` FROM `{table_name}` LIMIT 100"
|
323
|
+
result = self.execute_query(query)
|
324
|
+
|
325
|
+
values = []
|
326
|
+
for row in result:
|
327
|
+
val = row.get(col_name)
|
328
|
+
if val is not None:
|
329
|
+
values.append(str(val))
|
330
|
+
|
331
|
+
if values:
|
332
|
+
unique_values[table_name][col_name] = values
|
333
|
+
|
334
|
+
except Exception as e:
|
335
|
+
logger.debug(f"Could not get unique values for {table_name}.{col_name}: {e}")
|
336
|
+
continue
|
337
|
+
|
338
|
+
return unique_values
|
339
|
+
|
340
|
+
except Exception as e:
|
341
|
+
logger.error(f"Error getting unique values: {e}")
|
342
|
+
raise
|
148
343
|
|
149
|
-
def
|
150
|
-
"""Check if a table exists."""
|
151
|
-
query = f"""
|
152
|
-
SELECT COUNT(*) as count
|
153
|
-
FROM INFORMATION_SCHEMA.TABLES
|
154
|
-
WHERE TABLE_NAME = '{table_name}'
|
344
|
+
def get_example_data(self, table_name: str, number_of_rows: int = 30) -> Dict[str, List[Any]]:
|
155
345
|
"""
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
346
|
+
Get example data (most frequent values) for each column in a table.
|
347
|
+
|
348
|
+
Args:
|
349
|
+
table_name (str): The name of the table.
|
350
|
+
number_of_rows (int, optional): Maximum number of example values to return per column. Defaults to 30.
|
351
|
+
|
352
|
+
Returns:
|
353
|
+
Dict[str, List[Any]]: A dictionary mapping column names to lists of example values.
|
354
|
+
"""
|
355
|
+
if not self.engine:
|
356
|
+
raise RuntimeError("Not connected to database")
|
357
|
+
|
358
|
+
try:
|
359
|
+
inspector = inspect(self.engine)
|
360
|
+
columns = inspector.get_columns(table_name)
|
361
|
+
|
362
|
+
example_data = {}
|
363
|
+
|
364
|
+
for col in columns:
|
365
|
+
col_name = col['name']
|
366
|
+
col_type = str(col['type']).upper()
|
367
|
+
|
368
|
+
# Skip blob/binary columns
|
369
|
+
if any(t in col_type for t in ['BLOB', 'BINARY', 'IMAGE']):
|
370
|
+
example_data[col_name] = []
|
371
|
+
continue
|
372
|
+
|
373
|
+
try:
|
374
|
+
# Get most frequent values
|
375
|
+
query = f"""
|
376
|
+
SELECT `{col_name}`, COUNT(*) as freq
|
377
|
+
FROM `{table_name}`
|
378
|
+
WHERE `{col_name}` IS NOT NULL
|
379
|
+
GROUP BY `{col_name}`
|
380
|
+
ORDER BY freq DESC
|
381
|
+
LIMIT {number_of_rows}
|
382
|
+
"""
|
383
|
+
|
384
|
+
result = self.execute_query(query)
|
385
|
+
values = [row[col_name] for row in result]
|
386
|
+
|
387
|
+
example_data[col_name] = values
|
388
|
+
|
389
|
+
except Exception as e:
|
390
|
+
logger.debug(f"Could not get example data for {table_name}.{col_name}: {e}")
|
391
|
+
example_data[col_name] = []
|
392
|
+
|
393
|
+
return example_data
|
394
|
+
|
395
|
+
except Exception as e:
|
396
|
+
logger.error(f"Error getting example data: {e}")
|
397
|
+
raise
|