thoth-dbmanager 0.5.3__py3-none-any.whl → 0.5.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. thoth_dbmanager/ThothDbManager.py +14 -0
  2. thoth_dbmanager/__init__.py +15 -1
  3. thoth_dbmanager/adapters/__init__.py +44 -6
  4. thoth_dbmanager/adapters/mariadb.py +361 -129
  5. thoth_dbmanager/adapters/postgresql.py +49 -23
  6. thoth_dbmanager/adapters/sqlite.py +14 -1
  7. thoth_dbmanager/adapters/sqlserver.py +38 -9
  8. thoth_dbmanager/core/__init__.py +14 -0
  9. thoth_dbmanager/core/factory.py +17 -0
  10. thoth_dbmanager/core/interfaces.py +14 -0
  11. thoth_dbmanager/core/registry.py +14 -0
  12. thoth_dbmanager/documents.py +14 -0
  13. thoth_dbmanager/dynamic_imports.py +14 -0
  14. thoth_dbmanager/helpers/__init__.py +13 -0
  15. thoth_dbmanager/helpers/multi_db_generator.py +14 -0
  16. thoth_dbmanager/helpers/preprocess_values.py +14 -0
  17. thoth_dbmanager/helpers/schema.py +14 -0
  18. thoth_dbmanager/helpers/search.py +14 -0
  19. thoth_dbmanager/lsh/__init__.py +14 -0
  20. thoth_dbmanager/lsh/core.py +14 -0
  21. thoth_dbmanager/lsh/factory.py +14 -0
  22. thoth_dbmanager/lsh/manager.py +14 -0
  23. thoth_dbmanager/lsh/storage.py +14 -0
  24. thoth_dbmanager/plugins/__init__.py +47 -8
  25. thoth_dbmanager/plugins/mariadb.py +41 -251
  26. thoth_dbmanager/plugins/postgresql.py +14 -0
  27. thoth_dbmanager/plugins/sqlite.py +14 -0
  28. thoth_dbmanager/plugins/sqlserver.py +14 -0
  29. {thoth_dbmanager-0.5.3.dist-info → thoth_dbmanager-0.5.9.dist-info}/METADATA +2 -1
  30. thoth_dbmanager-0.5.9.dist-info/RECORD +34 -0
  31. thoth_dbmanager-0.5.9.dist-info/licenses/LICENSE.md +21 -0
  32. thoth_dbmanager-0.5.3.dist-info/RECORD +0 -33
  33. {thoth_dbmanager-0.5.3.dist-info → thoth_dbmanager-0.5.9.dist-info}/WHEEL +0 -0
  34. {thoth_dbmanager-0.5.3.dist-info → thoth_dbmanager-0.5.9.dist-info}/licenses/LICENSE +0 -0
  35. {thoth_dbmanager-0.5.3.dist-info → thoth_dbmanager-0.5.9.dist-info}/top_level.txt +0 -0
@@ -1,3 +1,17 @@
1
+ # Copyright 2025 Marco Pancotti
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
1
15
  import logging
2
16
  from abc import ABC, abstractmethod
3
17
  from pathlib import Path
@@ -1,3 +1,17 @@
1
+ # Copyright 2025 Marco Pancotti
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
1
15
  """
2
16
  Thoth Database Manager - A unified interface for multiple database systems.
3
17
 
@@ -72,4 +86,4 @@ __all__ = [
72
86
  "DatabaseImportError",
73
87
  ]
74
88
 
75
- __version__ = "0.5.0"
89
+ __version__ = "0.5.7"
@@ -1,15 +1,53 @@
1
+ # Copyright 2025 Marco Pancotti
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
1
15
  """
2
16
  Database adapters for Thoth SQL Database Manager.
3
17
  """
4
18
 
5
- from .postgresql import PostgreSQLAdapter
19
+ import logging
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+ # Always available adapter (SQLite is built into Python)
6
24
  from .sqlite import SQLiteAdapter
7
- from .mariadb import MariaDBAdapter
8
- from .sqlserver import SQLServerAdapter
9
25
 
10
26
  __all__ = [
11
- "PostgreSQLAdapter",
12
27
  "SQLiteAdapter",
13
- "MariaDBAdapter",
14
- "SQLServerAdapter",
15
28
  ]
29
+
30
+ # Optional adapters - only import if dependencies are available
31
+ try:
32
+ import psycopg2
33
+ from .postgresql import PostgreSQLAdapter
34
+ __all__.append("PostgreSQLAdapter")
35
+ except ImportError:
36
+ logger.debug("psycopg2 not installed, PostgreSQLAdapter not available")
37
+ PostgreSQLAdapter = None
38
+
39
+ try:
40
+ import mariadb
41
+ from .mariadb import MariaDBAdapter
42
+ __all__.append("MariaDBAdapter")
43
+ except ImportError:
44
+ logger.debug("MariaDB connector not installed, MariaDBAdapter not available")
45
+ MariaDBAdapter = None
46
+
47
+ try:
48
+ import pyodbc
49
+ from .sqlserver import SQLServerAdapter
50
+ __all__.append("SQLServerAdapter")
51
+ except ImportError:
52
+ logger.debug("pyodbc not installed, SQLServerAdapter not available")
53
+ SQLServerAdapter = None
@@ -1,165 +1,397 @@
1
+ # Copyright 2025 Marco Pancotti
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
1
15
  """
2
- MariaDB adapter for Thoth SQL Database Manager.
16
+ MariaDB adapter implementation.
3
17
  """
4
-
5
- from typing import Any, Dict, List, Optional
6
- from sqlalchemy import create_engine, text
7
- from sqlalchemy.engine import Engine
18
+ import logging
19
+ from typing import Any, Dict, List, Optional, Union
20
+ import mariadb
21
+ from sqlalchemy import create_engine, text, inspect
8
22
  from sqlalchemy.exc import SQLAlchemyError
9
23
 
10
24
  from ..core.interfaces import DbAdapter
25
+ from ..documents import (
26
+ TableDocument,
27
+ ColumnDocument,
28
+ SchemaDocument,
29
+ ForeignKeyDocument,
30
+ IndexDocument
31
+ )
32
+
33
+ logger = logging.getLogger(__name__)
11
34
 
12
35
 
13
36
  class MariaDBAdapter(DbAdapter):
14
- """MariaDB database adapter."""
37
+ """
38
+ MariaDB database adapter implementation.
39
+ """
15
40
 
16
- def __init__(self, connection_string: str, **kwargs: Any) -> None:
17
- """
18
- Initialize MariaDB adapter.
19
-
20
- Args:
21
- connection_string: MariaDB connection string
22
- **kwargs: Additional connection parameters
23
- """
24
- self.connection_string = connection_string
41
+ def __init__(self, connection_params: Dict[str, Any]):
42
+ super().__init__(connection_params)
25
43
  self.engine = None
26
- self.connection_params = kwargs
27
-
44
+ self.raw_connection = None
45
+ self.host = connection_params.get('host', 'localhost')
46
+ self.port = connection_params.get('port', 3307)
47
+ self.database = connection_params.get('database')
48
+ self.user = connection_params.get('user')
49
+ self.password = connection_params.get('password')
50
+
28
51
  def connect(self) -> None:
29
- """Establish database connection."""
52
+ """Establish MariaDB connection"""
30
53
  try:
31
- self.engine = create_engine(
32
- self.connection_string,
33
- pool_pre_ping=True,
34
- **self.connection_params
54
+ # Create SQLAlchemy engine
55
+ connection_string = self._build_connection_string()
56
+ self.engine = create_engine(connection_string, echo=False)
57
+
58
+ # Test connection
59
+ with self.engine.connect() as conn:
60
+ conn.execute(text("SELECT 1"))
61
+
62
+ # Also create raw mariadb connection for specific operations
63
+ self.raw_connection = mariadb.connect(
64
+ host=self.host,
65
+ port=self.port,
66
+ database=self.database,
67
+ user=self.user,
68
+ password=self.password
35
69
  )
70
+
71
+ self._initialized = True
72
+ logger.info("MariaDB connection established successfully")
73
+
36
74
  except Exception as e:
37
- raise ConnectionError(f"Failed to connect to MariaDB: {e}")
75
+ logger.error(f"Failed to connect to MariaDB: {e}")
76
+ raise
38
77
 
39
78
  def disconnect(self) -> None:
40
- """Close database connection."""
41
- if self.engine:
42
- self.engine.dispose()
43
- self.engine = None
79
+ """Close MariaDB connection"""
80
+ try:
81
+ if self.engine:
82
+ self.engine.dispose()
83
+ self.engine = None
84
+
85
+ if self.raw_connection:
86
+ self.raw_connection.close()
87
+ self.raw_connection = None
88
+
89
+ self._initialized = False
90
+ logger.info("MariaDB connection closed")
91
+
92
+ except Exception as e:
93
+ logger.error(f"Error closing MariaDB connection: {e}")
94
+
95
+ def _build_connection_string(self) -> str:
96
+ """Build SQLAlchemy connection string for MariaDB"""
97
+ if not all([self.database, self.user, self.password]):
98
+ raise ValueError("Missing required connection parameters: database, user, password")
99
+
100
+ # MariaDB uses mysql+pymysql or mariadb+mariadbconnector dialect
101
+ return f"mariadb+mariadbconnector://{self.user}:{self.password}@{self.host}:{self.port}/{self.database}"
44
102
 
45
- def execute_query(self, query: str, params: Optional[Dict[str, Any]] = None) -> List[Dict[str, Any]]:
46
- """Execute a query and return results."""
103
+ def execute_query(self, query: str, params: Optional[Dict] = None, fetch: Union[str, int] = "all", timeout: int = 60) -> Any:
104
+ """Execute SQL query"""
47
105
  if not self.engine:
48
- self.connect()
49
-
106
+ raise RuntimeError("Not connected to database")
107
+
50
108
  try:
51
109
  with self.engine.connect() as conn:
52
- result = conn.execute(text(query), params or {})
53
- return [dict(row._mapping) for row in result]
110
+ # MariaDB doesn't have direct query timeout in the same way
111
+ # but we can set connection timeout
112
+ conn.execute(text(f"SET SESSION max_statement_time = {timeout}"))
113
+
114
+ # Execute query
115
+ if params:
116
+ result = conn.execute(text(query), params)
117
+ else:
118
+ result = conn.execute(text(query))
119
+
120
+ # Handle different fetch modes
121
+ if query.strip().upper().startswith(('SELECT', 'WITH', 'SHOW', 'DESCRIBE')):
122
+ if fetch == "all":
123
+ return [dict(row._mapping) for row in result]
124
+ elif fetch == "one":
125
+ row = result.first()
126
+ return dict(row._mapping) if row else None
127
+ elif isinstance(fetch, int):
128
+ rows = result.fetchmany(fetch)
129
+ return [dict(row._mapping) for row in rows]
130
+ else:
131
+ # For INSERT, UPDATE, DELETE
132
+ conn.commit()
133
+ return result.rowcount
134
+
54
135
  except SQLAlchemyError as e:
55
- raise RuntimeError(f"MariaDB query failed: {e}")
136
+ logger.error(f"MariaDB query execution failed: {e}")
137
+ raise
56
138
 
57
- def execute_update(self, query: str, params: Optional[Dict[str, Any]] = None) -> int:
58
- """Execute an update query and return affected row count."""
139
+ def get_tables_as_documents(self) -> List[TableDocument]:
140
+ """Return tables as document objects"""
59
141
  if not self.engine:
60
- self.connect()
61
-
142
+ raise RuntimeError("Not connected to database")
143
+
62
144
  try:
63
- with self.engine.connect() as conn:
64
- result = conn.execute(text(query), params or {})
65
- conn.commit()
66
- return result.rowcount
67
- except SQLAlchemyError as e:
68
- raise RuntimeError(f"MariaDB update failed: {e}")
69
-
70
- def get_tables(self) -> List[str]:
71
- """Get list of tables in the database."""
72
- query = "SHOW TABLES"
73
- result = self.execute_query(query)
74
- return [list(row.values())[0] for row in result]
145
+ inspector = inspect(self.engine)
146
+ tables = []
147
+
148
+ for table_name in inspector.get_table_names():
149
+ # Get row count
150
+ count_result = self.execute_query(f"SELECT COUNT(*) as count FROM {table_name}", fetch="one")
151
+ row_count = count_result.get('count', 0) if count_result else 0
152
+
153
+ # Get column count
154
+ columns = inspector.get_columns(table_name)
155
+
156
+ # Get table comment (if available)
157
+ table_comment = ""
158
+ try:
159
+ comment_result = self.execute_query(
160
+ f"SELECT table_comment FROM information_schema.tables WHERE table_name = '{table_name}'",
161
+ fetch="one"
162
+ )
163
+ table_comment = comment_result.get('table_comment', '') if comment_result else ''
164
+ except:
165
+ pass
166
+
167
+ tables.append(TableDocument(
168
+ table_name=table_name,
169
+ table_type="TABLE",
170
+ row_count=row_count,
171
+ column_count=len(columns),
172
+ description=table_comment
173
+ ))
174
+
175
+ return tables
176
+
177
+ except Exception as e:
178
+ logger.error(f"Error getting tables as documents: {e}")
179
+ raise
75
180
 
76
- def get_table_schema(self, table_name: str) -> Dict[str, Any]:
77
- """Get schema information for a specific table."""
78
- query = f"DESCRIBE {table_name}"
79
- columns = self.execute_query(query)
80
-
81
- schema = {
82
- 'table_name': table_name,
83
- 'columns': []
84
- }
181
+ def get_columns_as_documents(self, table_name: str) -> List[ColumnDocument]:
182
+ """Return columns as document objects"""
183
+ if not self.engine:
184
+ raise RuntimeError("Not connected to database")
85
185
 
86
- for col in columns:
87
- schema['columns'].append({
88
- 'name': col['Field'],
89
- 'type': col['Type'],
90
- 'nullable': col['Null'] == 'YES',
91
- 'default': col['Default'],
92
- 'primary_key': col['Key'] == 'PRI'
93
- })
186
+ try:
187
+ inspector = inspect(self.engine)
188
+ columns = []
189
+
190
+ for col in inspector.get_columns(table_name):
191
+ columns.append(ColumnDocument(
192
+ table_name=table_name,
193
+ column_name=col['name'],
194
+ data_type=str(col['type']),
195
+ is_nullable=col.get('nullable', True),
196
+ column_default=col.get('default'),
197
+ is_pk=col.get('primary_key', False),
198
+ column_comment=col.get('comment', '')
199
+ ))
200
+
201
+ # Mark primary keys
202
+ pk_constraint = inspector.get_pk_constraint(table_name)
203
+ if pk_constraint and pk_constraint.get('constrained_columns'):
204
+ pk_columns = pk_constraint['constrained_columns']
205
+ for col in columns:
206
+ if col.column_name in pk_columns:
207
+ col.is_pk = True
208
+
209
+ return columns
210
+
211
+ except Exception as e:
212
+ logger.error(f"Error getting columns as documents: {e}")
213
+ raise
214
+
215
+ def get_foreign_keys_as_documents(self) -> List[ForeignKeyDocument]:
216
+ """Return foreign keys as document objects"""
217
+ if not self.engine:
218
+ raise RuntimeError("Not connected to database")
94
219
 
95
- return schema
220
+ try:
221
+ inspector = inspect(self.engine)
222
+ foreign_keys = []
223
+
224
+ for table_name in inspector.get_table_names():
225
+ for fk in inspector.get_foreign_keys(table_name):
226
+ # Each foreign key can have multiple column pairs
227
+ for i, const_col in enumerate(fk['constrained_columns']):
228
+ foreign_keys.append(ForeignKeyDocument(
229
+ constraint_name=fk['name'],
230
+ table_name=table_name,
231
+ column_name=const_col,
232
+ foreign_table_name=fk['referred_table'],
233
+ foreign_column_name=fk['referred_columns'][i] if i < len(fk['referred_columns']) else None
234
+ ))
235
+
236
+ return foreign_keys
237
+
238
+ except Exception as e:
239
+ logger.error(f"Error getting foreign keys as documents: {e}")
240
+ raise
96
241
 
97
- def get_indexes(self, table_name: str) -> List[Dict[str, Any]]:
98
- """Get index information for a table."""
99
- query = f"SHOW INDEX FROM {table_name}"
100
- indexes = self.execute_query(query)
242
+ def get_schemas_as_documents(self) -> List[SchemaDocument]:
243
+ """Return schemas as document objects"""
244
+ # MariaDB uses database as schema concept
245
+ if not self.engine:
246
+ raise RuntimeError("Not connected to database")
101
247
 
102
- result = []
103
- for idx in indexes:
104
- result.append({
105
- 'name': idx['Key_name'],
106
- 'column': idx['Column_name'],
107
- 'unique': not idx['Non_unique'],
108
- 'type': idx['Index_type']
109
- })
248
+ try:
249
+ # Get current database as schema
250
+ result = self.execute_query("SELECT DATABASE() as db_name", fetch="one")
251
+ current_db = result.get('db_name') if result else self.database
252
+
253
+ # Get table count for current database
254
+ tables = self.get_tables_as_documents()
255
+
256
+ return [SchemaDocument(
257
+ catalog_name=current_db,
258
+ schema_name=current_db,
259
+ schema_owner=self.user,
260
+ table_count=len(tables)
261
+ )]
262
+
263
+ except Exception as e:
264
+ logger.error(f"Error getting schemas as documents: {e}")
265
+ raise
266
+
267
+ def get_indexes_as_documents(self, table_name: Optional[str] = None) -> List[IndexDocument]:
268
+ """Return indexes as document objects"""
269
+ if not self.engine:
270
+ raise RuntimeError("Not connected to database")
110
271
 
111
- return result
272
+ try:
273
+ inspector = inspect(self.engine)
274
+ indexes = []
275
+
276
+ # Get tables to process
277
+ tables = [table_name] if table_name else inspector.get_table_names()
278
+
279
+ for tbl in tables:
280
+ for idx in inspector.get_indexes(tbl):
281
+ indexes.append(IndexDocument(
282
+ table_name=tbl,
283
+ index_name=idx['name'],
284
+ column_names=idx['column_names'],
285
+ is_unique=idx.get('unique', False),
286
+ index_type='BTREE' # MariaDB default
287
+ ))
288
+
289
+ return indexes
290
+
291
+ except Exception as e:
292
+ logger.error(f"Error getting indexes as documents: {e}")
293
+ raise
112
294
 
113
- def get_foreign_keys(self, table_name: str) -> List[Dict[str, Any]]:
114
- """Get foreign key information for a table."""
115
- query = f"""
116
- SELECT
117
- CONSTRAINT_NAME as name,
118
- COLUMN_NAME as column_name,
119
- REFERENCED_TABLE_NAME as referenced_table,
120
- REFERENCED_COLUMN_NAME as referenced_column
121
- FROM INFORMATION_SCHEMA.KEY_COLUMN_USAGE
122
- WHERE TABLE_NAME = '{table_name}'
123
- AND REFERENCED_TABLE_NAME IS NOT NULL
295
+ def get_unique_values(self) -> Dict[str, Dict[str, List[str]]]:
124
296
  """
297
+ Get unique values from the database.
125
298
 
126
- return self.execute_query(query)
127
-
128
- def create_table(self, table_name: str, schema: Dict[str, Any]) -> None:
129
- """Create a new table with the given schema."""
130
- columns = []
131
- for col in schema.get('columns', []):
132
- col_def = f"{col['name']} {col['type']}"
133
- if not col.get('nullable', True):
134
- col_def += " NOT NULL"
135
- if col.get('default') is not None:
136
- col_def += f" DEFAULT {col['default']}"
137
- if col.get('primary_key'):
138
- col_def += " PRIMARY KEY"
139
- columns.append(col_def)
299
+ Returns:
300
+ Dict[str, Dict[str, List[str]]]: Dictionary where:
301
+ - outer key is table name
302
+ - inner key is column name
303
+ - value is list of unique values
304
+ """
305
+ if not self.engine:
306
+ raise RuntimeError("Not connected to database")
140
307
 
141
- query = f"CREATE TABLE {table_name} ({', '.join(columns)})"
142
- self.execute_update(query)
143
-
144
- def drop_table(self, table_name: str) -> None:
145
- """Drop a table."""
146
- query = f"DROP TABLE IF EXISTS {table_name}"
147
- self.execute_update(query)
308
+ try:
309
+ inspector = inspect(self.engine)
310
+ unique_values = {}
311
+
312
+ for table_name in inspector.get_table_names():
313
+ unique_values[table_name] = {}
314
+
315
+ for col in inspector.get_columns(table_name):
316
+ col_name = col['name']
317
+ # Only get unique values for reasonable data types
318
+ col_type = str(col['type']).upper()
319
+
320
+ if any(t in col_type for t in ['VARCHAR', 'CHAR', 'TEXT', 'INT', 'ENUM']):
321
+ try:
322
+ query = f"SELECT DISTINCT `{col_name}` FROM `{table_name}` LIMIT 100"
323
+ result = self.execute_query(query)
324
+
325
+ values = []
326
+ for row in result:
327
+ val = row.get(col_name)
328
+ if val is not None:
329
+ values.append(str(val))
330
+
331
+ if values:
332
+ unique_values[table_name][col_name] = values
333
+
334
+ except Exception as e:
335
+ logger.debug(f"Could not get unique values for {table_name}.{col_name}: {e}")
336
+ continue
337
+
338
+ return unique_values
339
+
340
+ except Exception as e:
341
+ logger.error(f"Error getting unique values: {e}")
342
+ raise
148
343
 
149
- def table_exists(self, table_name: str) -> bool:
150
- """Check if a table exists."""
151
- query = f"""
152
- SELECT COUNT(*) as count
153
- FROM INFORMATION_SCHEMA.TABLES
154
- WHERE TABLE_NAME = '{table_name}'
344
+ def get_example_data(self, table_name: str, number_of_rows: int = 30) -> Dict[str, List[Any]]:
155
345
  """
156
- result = self.execute_query(query)
157
- return result[0]['count'] > 0
158
-
159
- def get_connection_info(self) -> Dict[str, Any]:
160
- """Get connection information."""
161
- return {
162
- 'type': 'mariadb',
163
- 'connection_string': self.connection_string,
164
- 'connected': self.engine is not None
165
- }
346
+ Get example data (most frequent values) for each column in a table.
347
+
348
+ Args:
349
+ table_name (str): The name of the table.
350
+ number_of_rows (int, optional): Maximum number of example values to return per column. Defaults to 30.
351
+
352
+ Returns:
353
+ Dict[str, List[Any]]: A dictionary mapping column names to lists of example values.
354
+ """
355
+ if not self.engine:
356
+ raise RuntimeError("Not connected to database")
357
+
358
+ try:
359
+ inspector = inspect(self.engine)
360
+ columns = inspector.get_columns(table_name)
361
+
362
+ example_data = {}
363
+
364
+ for col in columns:
365
+ col_name = col['name']
366
+ col_type = str(col['type']).upper()
367
+
368
+ # Skip blob/binary columns
369
+ if any(t in col_type for t in ['BLOB', 'BINARY', 'IMAGE']):
370
+ example_data[col_name] = []
371
+ continue
372
+
373
+ try:
374
+ # Get most frequent values
375
+ query = f"""
376
+ SELECT `{col_name}`, COUNT(*) as freq
377
+ FROM `{table_name}`
378
+ WHERE `{col_name}` IS NOT NULL
379
+ GROUP BY `{col_name}`
380
+ ORDER BY freq DESC
381
+ LIMIT {number_of_rows}
382
+ """
383
+
384
+ result = self.execute_query(query)
385
+ values = [row[col_name] for row in result]
386
+
387
+ example_data[col_name] = values
388
+
389
+ except Exception as e:
390
+ logger.debug(f"Could not get example data for {table_name}.{col_name}: {e}")
391
+ example_data[col_name] = []
392
+
393
+ return example_data
394
+
395
+ except Exception as e:
396
+ logger.error(f"Error getting example data: {e}")
397
+ raise