thoth-dbmanager 0.5.3__tar.gz → 0.5.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. thoth_dbmanager-0.5.9/LICENSE.md +21 -0
  2. {thoth_dbmanager-0.5.3/thoth_dbmanager.egg-info → thoth_dbmanager-0.5.9}/PKG-INFO +2 -1
  3. {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/pyproject.toml +1 -1
  4. {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/tests/test_lsh_interactive.py +13 -3
  5. {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/tests/test_thoth_db_manager_base.py +14 -0
  6. {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/thoth_dbmanager/ThothDbManager.py +14 -0
  7. {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/thoth_dbmanager/__init__.py +15 -1
  8. thoth_dbmanager-0.5.9/thoth_dbmanager/adapters/__init__.py +53 -0
  9. thoth_dbmanager-0.5.9/thoth_dbmanager/adapters/mariadb.py +397 -0
  10. {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/thoth_dbmanager/adapters/postgresql.py +49 -23
  11. {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/thoth_dbmanager/adapters/sqlite.py +14 -1
  12. {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/thoth_dbmanager/adapters/sqlserver.py +38 -9
  13. thoth_dbmanager-0.5.9/thoth_dbmanager/core/__init__.py +27 -0
  14. {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/thoth_dbmanager/core/factory.py +17 -0
  15. {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/thoth_dbmanager/core/interfaces.py +14 -0
  16. {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/thoth_dbmanager/core/registry.py +14 -0
  17. {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/thoth_dbmanager/documents.py +14 -0
  18. {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/thoth_dbmanager/dynamic_imports.py +14 -0
  19. thoth_dbmanager-0.5.9/thoth_dbmanager/helpers/__init__.py +13 -0
  20. {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/thoth_dbmanager/helpers/multi_db_generator.py +14 -0
  21. {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/thoth_dbmanager/helpers/preprocess_values.py +14 -0
  22. {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/thoth_dbmanager/helpers/schema.py +14 -0
  23. {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/thoth_dbmanager/helpers/search.py +14 -0
  24. thoth_dbmanager-0.5.9/thoth_dbmanager/lsh/__init__.py +35 -0
  25. {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/thoth_dbmanager/lsh/core.py +14 -0
  26. {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/thoth_dbmanager/lsh/factory.py +14 -0
  27. {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/thoth_dbmanager/lsh/manager.py +14 -0
  28. {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/thoth_dbmanager/lsh/storage.py +14 -0
  29. thoth_dbmanager-0.5.9/thoth_dbmanager/plugins/__init__.py +56 -0
  30. thoth_dbmanager-0.5.9/thoth_dbmanager/plugins/mariadb.py +226 -0
  31. {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/thoth_dbmanager/plugins/postgresql.py +14 -0
  32. {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/thoth_dbmanager/plugins/sqlite.py +14 -0
  33. {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/thoth_dbmanager/plugins/sqlserver.py +14 -0
  34. {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9/thoth_dbmanager.egg-info}/PKG-INFO +2 -1
  35. {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/thoth_dbmanager.egg-info/SOURCES.txt +1 -0
  36. thoth_dbmanager-0.5.3/thoth_dbmanager/adapters/__init__.py +0 -15
  37. thoth_dbmanager-0.5.3/thoth_dbmanager/adapters/mariadb.py +0 -165
  38. thoth_dbmanager-0.5.3/thoth_dbmanager/core/__init__.py +0 -13
  39. thoth_dbmanager-0.5.3/thoth_dbmanager/helpers/__init__.py +0 -0
  40. thoth_dbmanager-0.5.3/thoth_dbmanager/lsh/__init__.py +0 -21
  41. thoth_dbmanager-0.5.3/thoth_dbmanager/plugins/__init__.py +0 -17
  42. thoth_dbmanager-0.5.3/thoth_dbmanager/plugins/mariadb.py +0 -436
  43. {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/LICENSE +0 -0
  44. {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/MANIFEST.in +0 -0
  45. {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/README.md +0 -0
  46. {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/setup.cfg +0 -0
  47. {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/thoth_dbmanager.egg-info/dependency_links.txt +0 -0
  48. {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/thoth_dbmanager.egg-info/requires.txt +0 -0
  49. {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/thoth_dbmanager.egg-info/top_level.txt +0 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Marco Pancotti
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: thoth_dbmanager
3
- Version: 0.5.3
3
+ Version: 0.5.9
4
4
  Summary: A Python library for managing SQL databases with support for multiple database types, LSH-based similarity search, and a modern plugin architecture.
5
5
  Author-email: Marco Pancotti <mp@tylconsulting.it>
6
6
  Project-URL: Homepage, https://github.com/mptyl/thoth_dbmanager
@@ -22,6 +22,7 @@ Classifier: Development Status :: 4 - Beta
22
22
  Requires-Python: >=3.9
23
23
  Description-Content-Type: text/markdown
24
24
  License-File: LICENSE
25
+ License-File: LICENSE.md
25
26
  Requires-Dist: datasketch>=1.5.0
26
27
  Requires-Dist: tqdm>=4.60.0
27
28
  Requires-Dist: SQLAlchemy>=1.4.0
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "thoth_dbmanager"
7
- version = "0.5.3"
7
+ version = "0.5.9"
8
8
  authors = [
9
9
  { name="Marco Pancotti", email="mp@tylconsulting.it" },
10
10
  ]
@@ -1,6 +1,16 @@
1
- # Copyright (c) 2025 Marco Pancotti
2
- # This file is part of Thoth and is released under the MIT License.
3
- # See the LICENSE.md file in the project root for full license information.
1
+ # Copyright 2025 Marco Pancotti
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
4
14
 
5
15
  """
6
16
  Interactive LSH search test utility.
@@ -1,3 +1,17 @@
1
+ # Copyright 2025 Marco Pancotti
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
1
15
  import unittest
2
16
  from abc import ABC
3
17
  from unittest.mock import MagicMock, patch
@@ -1,3 +1,17 @@
1
+ # Copyright 2025 Marco Pancotti
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
1
15
  import logging
2
16
  from abc import ABC, abstractmethod
3
17
  from pathlib import Path
@@ -1,3 +1,17 @@
1
+ # Copyright 2025 Marco Pancotti
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
1
15
  """
2
16
  Thoth Database Manager - A unified interface for multiple database systems.
3
17
 
@@ -72,4 +86,4 @@ __all__ = [
72
86
  "DatabaseImportError",
73
87
  ]
74
88
 
75
- __version__ = "0.5.0"
89
+ __version__ = "0.5.7"
@@ -0,0 +1,53 @@
1
+ # Copyright 2025 Marco Pancotti
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """
16
+ Database adapters for Thoth SQL Database Manager.
17
+ """
18
+
19
+ import logging
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+ # Always available adapter (SQLite is built into Python)
24
+ from .sqlite import SQLiteAdapter
25
+
26
+ __all__ = [
27
+ "SQLiteAdapter",
28
+ ]
29
+
30
+ # Optional adapters - only import if dependencies are available
31
+ try:
32
+ import psycopg2
33
+ from .postgresql import PostgreSQLAdapter
34
+ __all__.append("PostgreSQLAdapter")
35
+ except ImportError:
36
+ logger.debug("psycopg2 not installed, PostgreSQLAdapter not available")
37
+ PostgreSQLAdapter = None
38
+
39
+ try:
40
+ import mariadb
41
+ from .mariadb import MariaDBAdapter
42
+ __all__.append("MariaDBAdapter")
43
+ except ImportError:
44
+ logger.debug("MariaDB connector not installed, MariaDBAdapter not available")
45
+ MariaDBAdapter = None
46
+
47
+ try:
48
+ import pyodbc
49
+ from .sqlserver import SQLServerAdapter
50
+ __all__.append("SQLServerAdapter")
51
+ except ImportError:
52
+ logger.debug("pyodbc not installed, SQLServerAdapter not available")
53
+ SQLServerAdapter = None
@@ -0,0 +1,397 @@
1
+ # Copyright 2025 Marco Pancotti
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """
16
+ MariaDB adapter implementation.
17
+ """
18
+ import logging
19
+ from typing import Any, Dict, List, Optional, Union
20
+ import mariadb
21
+ from sqlalchemy import create_engine, text, inspect
22
+ from sqlalchemy.exc import SQLAlchemyError
23
+
24
+ from ..core.interfaces import DbAdapter
25
+ from ..documents import (
26
+ TableDocument,
27
+ ColumnDocument,
28
+ SchemaDocument,
29
+ ForeignKeyDocument,
30
+ IndexDocument
31
+ )
32
+
33
+ logger = logging.getLogger(__name__)
34
+
35
+
36
+ class MariaDBAdapter(DbAdapter):
37
+ """
38
+ MariaDB database adapter implementation.
39
+ """
40
+
41
+ def __init__(self, connection_params: Dict[str, Any]):
42
+ super().__init__(connection_params)
43
+ self.engine = None
44
+ self.raw_connection = None
45
+ self.host = connection_params.get('host', 'localhost')
46
+ self.port = connection_params.get('port', 3307)
47
+ self.database = connection_params.get('database')
48
+ self.user = connection_params.get('user')
49
+ self.password = connection_params.get('password')
50
+
51
+ def connect(self) -> None:
52
+ """Establish MariaDB connection"""
53
+ try:
54
+ # Create SQLAlchemy engine
55
+ connection_string = self._build_connection_string()
56
+ self.engine = create_engine(connection_string, echo=False)
57
+
58
+ # Test connection
59
+ with self.engine.connect() as conn:
60
+ conn.execute(text("SELECT 1"))
61
+
62
+ # Also create raw mariadb connection for specific operations
63
+ self.raw_connection = mariadb.connect(
64
+ host=self.host,
65
+ port=self.port,
66
+ database=self.database,
67
+ user=self.user,
68
+ password=self.password
69
+ )
70
+
71
+ self._initialized = True
72
+ logger.info("MariaDB connection established successfully")
73
+
74
+ except Exception as e:
75
+ logger.error(f"Failed to connect to MariaDB: {e}")
76
+ raise
77
+
78
+ def disconnect(self) -> None:
79
+ """Close MariaDB connection"""
80
+ try:
81
+ if self.engine:
82
+ self.engine.dispose()
83
+ self.engine = None
84
+
85
+ if self.raw_connection:
86
+ self.raw_connection.close()
87
+ self.raw_connection = None
88
+
89
+ self._initialized = False
90
+ logger.info("MariaDB connection closed")
91
+
92
+ except Exception as e:
93
+ logger.error(f"Error closing MariaDB connection: {e}")
94
+
95
+ def _build_connection_string(self) -> str:
96
+ """Build SQLAlchemy connection string for MariaDB"""
97
+ if not all([self.database, self.user, self.password]):
98
+ raise ValueError("Missing required connection parameters: database, user, password")
99
+
100
+ # MariaDB uses mysql+pymysql or mariadb+mariadbconnector dialect
101
+ return f"mariadb+mariadbconnector://{self.user}:{self.password}@{self.host}:{self.port}/{self.database}"
102
+
103
+ def execute_query(self, query: str, params: Optional[Dict] = None, fetch: Union[str, int] = "all", timeout: int = 60) -> Any:
104
+ """Execute SQL query"""
105
+ if not self.engine:
106
+ raise RuntimeError("Not connected to database")
107
+
108
+ try:
109
+ with self.engine.connect() as conn:
110
+ # MariaDB doesn't have direct query timeout in the same way
111
+ # but we can set connection timeout
112
+ conn.execute(text(f"SET SESSION max_statement_time = {timeout}"))
113
+
114
+ # Execute query
115
+ if params:
116
+ result = conn.execute(text(query), params)
117
+ else:
118
+ result = conn.execute(text(query))
119
+
120
+ # Handle different fetch modes
121
+ if query.strip().upper().startswith(('SELECT', 'WITH', 'SHOW', 'DESCRIBE')):
122
+ if fetch == "all":
123
+ return [dict(row._mapping) for row in result]
124
+ elif fetch == "one":
125
+ row = result.first()
126
+ return dict(row._mapping) if row else None
127
+ elif isinstance(fetch, int):
128
+ rows = result.fetchmany(fetch)
129
+ return [dict(row._mapping) for row in rows]
130
+ else:
131
+ # For INSERT, UPDATE, DELETE
132
+ conn.commit()
133
+ return result.rowcount
134
+
135
+ except SQLAlchemyError as e:
136
+ logger.error(f"MariaDB query execution failed: {e}")
137
+ raise
138
+
139
+ def get_tables_as_documents(self) -> List[TableDocument]:
140
+ """Return tables as document objects"""
141
+ if not self.engine:
142
+ raise RuntimeError("Not connected to database")
143
+
144
+ try:
145
+ inspector = inspect(self.engine)
146
+ tables = []
147
+
148
+ for table_name in inspector.get_table_names():
149
+ # Get row count
150
+ count_result = self.execute_query(f"SELECT COUNT(*) as count FROM {table_name}", fetch="one")
151
+ row_count = count_result.get('count', 0) if count_result else 0
152
+
153
+ # Get column count
154
+ columns = inspector.get_columns(table_name)
155
+
156
+ # Get table comment (if available)
157
+ table_comment = ""
158
+ try:
159
+ comment_result = self.execute_query(
160
+ f"SELECT table_comment FROM information_schema.tables WHERE table_name = '{table_name}'",
161
+ fetch="one"
162
+ )
163
+ table_comment = comment_result.get('table_comment', '') if comment_result else ''
164
+ except:
165
+ pass
166
+
167
+ tables.append(TableDocument(
168
+ table_name=table_name,
169
+ table_type="TABLE",
170
+ row_count=row_count,
171
+ column_count=len(columns),
172
+ description=table_comment
173
+ ))
174
+
175
+ return tables
176
+
177
+ except Exception as e:
178
+ logger.error(f"Error getting tables as documents: {e}")
179
+ raise
180
+
181
+ def get_columns_as_documents(self, table_name: str) -> List[ColumnDocument]:
182
+ """Return columns as document objects"""
183
+ if not self.engine:
184
+ raise RuntimeError("Not connected to database")
185
+
186
+ try:
187
+ inspector = inspect(self.engine)
188
+ columns = []
189
+
190
+ for col in inspector.get_columns(table_name):
191
+ columns.append(ColumnDocument(
192
+ table_name=table_name,
193
+ column_name=col['name'],
194
+ data_type=str(col['type']),
195
+ is_nullable=col.get('nullable', True),
196
+ column_default=col.get('default'),
197
+ is_pk=col.get('primary_key', False),
198
+ column_comment=col.get('comment', '')
199
+ ))
200
+
201
+ # Mark primary keys
202
+ pk_constraint = inspector.get_pk_constraint(table_name)
203
+ if pk_constraint and pk_constraint.get('constrained_columns'):
204
+ pk_columns = pk_constraint['constrained_columns']
205
+ for col in columns:
206
+ if col.column_name in pk_columns:
207
+ col.is_pk = True
208
+
209
+ return columns
210
+
211
+ except Exception as e:
212
+ logger.error(f"Error getting columns as documents: {e}")
213
+ raise
214
+
215
+ def get_foreign_keys_as_documents(self) -> List[ForeignKeyDocument]:
216
+ """Return foreign keys as document objects"""
217
+ if not self.engine:
218
+ raise RuntimeError("Not connected to database")
219
+
220
+ try:
221
+ inspector = inspect(self.engine)
222
+ foreign_keys = []
223
+
224
+ for table_name in inspector.get_table_names():
225
+ for fk in inspector.get_foreign_keys(table_name):
226
+ # Each foreign key can have multiple column pairs
227
+ for i, const_col in enumerate(fk['constrained_columns']):
228
+ foreign_keys.append(ForeignKeyDocument(
229
+ constraint_name=fk['name'],
230
+ table_name=table_name,
231
+ column_name=const_col,
232
+ foreign_table_name=fk['referred_table'],
233
+ foreign_column_name=fk['referred_columns'][i] if i < len(fk['referred_columns']) else None
234
+ ))
235
+
236
+ return foreign_keys
237
+
238
+ except Exception as e:
239
+ logger.error(f"Error getting foreign keys as documents: {e}")
240
+ raise
241
+
242
+ def get_schemas_as_documents(self) -> List[SchemaDocument]:
243
+ """Return schemas as document objects"""
244
+ # MariaDB uses database as schema concept
245
+ if not self.engine:
246
+ raise RuntimeError("Not connected to database")
247
+
248
+ try:
249
+ # Get current database as schema
250
+ result = self.execute_query("SELECT DATABASE() as db_name", fetch="one")
251
+ current_db = result.get('db_name') if result else self.database
252
+
253
+ # Get table count for current database
254
+ tables = self.get_tables_as_documents()
255
+
256
+ return [SchemaDocument(
257
+ catalog_name=current_db,
258
+ schema_name=current_db,
259
+ schema_owner=self.user,
260
+ table_count=len(tables)
261
+ )]
262
+
263
+ except Exception as e:
264
+ logger.error(f"Error getting schemas as documents: {e}")
265
+ raise
266
+
267
+ def get_indexes_as_documents(self, table_name: Optional[str] = None) -> List[IndexDocument]:
268
+ """Return indexes as document objects"""
269
+ if not self.engine:
270
+ raise RuntimeError("Not connected to database")
271
+
272
+ try:
273
+ inspector = inspect(self.engine)
274
+ indexes = []
275
+
276
+ # Get tables to process
277
+ tables = [table_name] if table_name else inspector.get_table_names()
278
+
279
+ for tbl in tables:
280
+ for idx in inspector.get_indexes(tbl):
281
+ indexes.append(IndexDocument(
282
+ table_name=tbl,
283
+ index_name=idx['name'],
284
+ column_names=idx['column_names'],
285
+ is_unique=idx.get('unique', False),
286
+ index_type='BTREE' # MariaDB default
287
+ ))
288
+
289
+ return indexes
290
+
291
+ except Exception as e:
292
+ logger.error(f"Error getting indexes as documents: {e}")
293
+ raise
294
+
295
+ def get_unique_values(self) -> Dict[str, Dict[str, List[str]]]:
296
+ """
297
+ Get unique values from the database.
298
+
299
+ Returns:
300
+ Dict[str, Dict[str, List[str]]]: Dictionary where:
301
+ - outer key is table name
302
+ - inner key is column name
303
+ - value is list of unique values
304
+ """
305
+ if not self.engine:
306
+ raise RuntimeError("Not connected to database")
307
+
308
+ try:
309
+ inspector = inspect(self.engine)
310
+ unique_values = {}
311
+
312
+ for table_name in inspector.get_table_names():
313
+ unique_values[table_name] = {}
314
+
315
+ for col in inspector.get_columns(table_name):
316
+ col_name = col['name']
317
+ # Only get unique values for reasonable data types
318
+ col_type = str(col['type']).upper()
319
+
320
+ if any(t in col_type for t in ['VARCHAR', 'CHAR', 'TEXT', 'INT', 'ENUM']):
321
+ try:
322
+ query = f"SELECT DISTINCT `{col_name}` FROM `{table_name}` LIMIT 100"
323
+ result = self.execute_query(query)
324
+
325
+ values = []
326
+ for row in result:
327
+ val = row.get(col_name)
328
+ if val is not None:
329
+ values.append(str(val))
330
+
331
+ if values:
332
+ unique_values[table_name][col_name] = values
333
+
334
+ except Exception as e:
335
+ logger.debug(f"Could not get unique values for {table_name}.{col_name}: {e}")
336
+ continue
337
+
338
+ return unique_values
339
+
340
+ except Exception as e:
341
+ logger.error(f"Error getting unique values: {e}")
342
+ raise
343
+
344
+ def get_example_data(self, table_name: str, number_of_rows: int = 30) -> Dict[str, List[Any]]:
345
+ """
346
+ Get example data (most frequent values) for each column in a table.
347
+
348
+ Args:
349
+ table_name (str): The name of the table.
350
+ number_of_rows (int, optional): Maximum number of example values to return per column. Defaults to 30.
351
+
352
+ Returns:
353
+ Dict[str, List[Any]]: A dictionary mapping column names to lists of example values.
354
+ """
355
+ if not self.engine:
356
+ raise RuntimeError("Not connected to database")
357
+
358
+ try:
359
+ inspector = inspect(self.engine)
360
+ columns = inspector.get_columns(table_name)
361
+
362
+ example_data = {}
363
+
364
+ for col in columns:
365
+ col_name = col['name']
366
+ col_type = str(col['type']).upper()
367
+
368
+ # Skip blob/binary columns
369
+ if any(t in col_type for t in ['BLOB', 'BINARY', 'IMAGE']):
370
+ example_data[col_name] = []
371
+ continue
372
+
373
+ try:
374
+ # Get most frequent values
375
+ query = f"""
376
+ SELECT `{col_name}`, COUNT(*) as freq
377
+ FROM `{table_name}`
378
+ WHERE `{col_name}` IS NOT NULL
379
+ GROUP BY `{col_name}`
380
+ ORDER BY freq DESC
381
+ LIMIT {number_of_rows}
382
+ """
383
+
384
+ result = self.execute_query(query)
385
+ values = [row[col_name] for row in result]
386
+
387
+ example_data[col_name] = values
388
+
389
+ except Exception as e:
390
+ logger.debug(f"Could not get example data for {table_name}.{col_name}: {e}")
391
+ example_data[col_name] = []
392
+
393
+ return example_data
394
+
395
+ except Exception as e:
396
+ logger.error(f"Error getting example data: {e}")
397
+ raise