thoth-dbmanager 0.5.3__tar.gz → 0.5.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- thoth_dbmanager-0.5.9/LICENSE.md +21 -0
- {thoth_dbmanager-0.5.3/thoth_dbmanager.egg-info → thoth_dbmanager-0.5.9}/PKG-INFO +2 -1
- {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/pyproject.toml +1 -1
- {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/tests/test_lsh_interactive.py +13 -3
- {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/tests/test_thoth_db_manager_base.py +14 -0
- {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/thoth_dbmanager/ThothDbManager.py +14 -0
- {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/thoth_dbmanager/__init__.py +15 -1
- thoth_dbmanager-0.5.9/thoth_dbmanager/adapters/__init__.py +53 -0
- thoth_dbmanager-0.5.9/thoth_dbmanager/adapters/mariadb.py +397 -0
- {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/thoth_dbmanager/adapters/postgresql.py +49 -23
- {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/thoth_dbmanager/adapters/sqlite.py +14 -1
- {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/thoth_dbmanager/adapters/sqlserver.py +38 -9
- thoth_dbmanager-0.5.9/thoth_dbmanager/core/__init__.py +27 -0
- {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/thoth_dbmanager/core/factory.py +17 -0
- {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/thoth_dbmanager/core/interfaces.py +14 -0
- {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/thoth_dbmanager/core/registry.py +14 -0
- {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/thoth_dbmanager/documents.py +14 -0
- {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/thoth_dbmanager/dynamic_imports.py +14 -0
- thoth_dbmanager-0.5.9/thoth_dbmanager/helpers/__init__.py +13 -0
- {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/thoth_dbmanager/helpers/multi_db_generator.py +14 -0
- {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/thoth_dbmanager/helpers/preprocess_values.py +14 -0
- {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/thoth_dbmanager/helpers/schema.py +14 -0
- {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/thoth_dbmanager/helpers/search.py +14 -0
- thoth_dbmanager-0.5.9/thoth_dbmanager/lsh/__init__.py +35 -0
- {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/thoth_dbmanager/lsh/core.py +14 -0
- {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/thoth_dbmanager/lsh/factory.py +14 -0
- {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/thoth_dbmanager/lsh/manager.py +14 -0
- {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/thoth_dbmanager/lsh/storage.py +14 -0
- thoth_dbmanager-0.5.9/thoth_dbmanager/plugins/__init__.py +56 -0
- thoth_dbmanager-0.5.9/thoth_dbmanager/plugins/mariadb.py +226 -0
- {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/thoth_dbmanager/plugins/postgresql.py +14 -0
- {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/thoth_dbmanager/plugins/sqlite.py +14 -0
- {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/thoth_dbmanager/plugins/sqlserver.py +14 -0
- {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9/thoth_dbmanager.egg-info}/PKG-INFO +2 -1
- {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/thoth_dbmanager.egg-info/SOURCES.txt +1 -0
- thoth_dbmanager-0.5.3/thoth_dbmanager/adapters/__init__.py +0 -15
- thoth_dbmanager-0.5.3/thoth_dbmanager/adapters/mariadb.py +0 -165
- thoth_dbmanager-0.5.3/thoth_dbmanager/core/__init__.py +0 -13
- thoth_dbmanager-0.5.3/thoth_dbmanager/helpers/__init__.py +0 -0
- thoth_dbmanager-0.5.3/thoth_dbmanager/lsh/__init__.py +0 -21
- thoth_dbmanager-0.5.3/thoth_dbmanager/plugins/__init__.py +0 -17
- thoth_dbmanager-0.5.3/thoth_dbmanager/plugins/mariadb.py +0 -436
- {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/LICENSE +0 -0
- {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/MANIFEST.in +0 -0
- {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/README.md +0 -0
- {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/setup.cfg +0 -0
- {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/thoth_dbmanager.egg-info/dependency_links.txt +0 -0
- {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/thoth_dbmanager.egg-info/requires.txt +0 -0
- {thoth_dbmanager-0.5.3 → thoth_dbmanager-0.5.9}/thoth_dbmanager.egg-info/top_level.txt +0 -0
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2025 Marco Pancotti
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: thoth_dbmanager
|
3
|
-
Version: 0.5.
|
3
|
+
Version: 0.5.9
|
4
4
|
Summary: A Python library for managing SQL databases with support for multiple database types, LSH-based similarity search, and a modern plugin architecture.
|
5
5
|
Author-email: Marco Pancotti <mp@tylconsulting.it>
|
6
6
|
Project-URL: Homepage, https://github.com/mptyl/thoth_dbmanager
|
@@ -22,6 +22,7 @@ Classifier: Development Status :: 4 - Beta
|
|
22
22
|
Requires-Python: >=3.9
|
23
23
|
Description-Content-Type: text/markdown
|
24
24
|
License-File: LICENSE
|
25
|
+
License-File: LICENSE.md
|
25
26
|
Requires-Dist: datasketch>=1.5.0
|
26
27
|
Requires-Dist: tqdm>=4.60.0
|
27
28
|
Requires-Dist: SQLAlchemy>=1.4.0
|
@@ -1,6 +1,16 @@
|
|
1
|
-
# Copyright
|
2
|
-
#
|
3
|
-
#
|
1
|
+
# Copyright 2025 Marco Pancotti
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
4
14
|
|
5
15
|
"""
|
6
16
|
Interactive LSH search test utility.
|
@@ -1,3 +1,17 @@
|
|
1
|
+
# Copyright 2025 Marco Pancotti
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
1
15
|
import unittest
|
2
16
|
from abc import ABC
|
3
17
|
from unittest.mock import MagicMock, patch
|
@@ -1,3 +1,17 @@
|
|
1
|
+
# Copyright 2025 Marco Pancotti
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
1
15
|
import logging
|
2
16
|
from abc import ABC, abstractmethod
|
3
17
|
from pathlib import Path
|
@@ -1,3 +1,17 @@
|
|
1
|
+
# Copyright 2025 Marco Pancotti
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
1
15
|
"""
|
2
16
|
Thoth Database Manager - A unified interface for multiple database systems.
|
3
17
|
|
@@ -72,4 +86,4 @@ __all__ = [
|
|
72
86
|
"DatabaseImportError",
|
73
87
|
]
|
74
88
|
|
75
|
-
__version__ = "0.5.
|
89
|
+
__version__ = "0.5.7"
|
@@ -0,0 +1,53 @@
|
|
1
|
+
# Copyright 2025 Marco Pancotti
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
"""
|
16
|
+
Database adapters for Thoth SQL Database Manager.
|
17
|
+
"""
|
18
|
+
|
19
|
+
import logging
|
20
|
+
|
21
|
+
logger = logging.getLogger(__name__)
|
22
|
+
|
23
|
+
# Always available adapter (SQLite is built into Python)
|
24
|
+
from .sqlite import SQLiteAdapter
|
25
|
+
|
26
|
+
__all__ = [
|
27
|
+
"SQLiteAdapter",
|
28
|
+
]
|
29
|
+
|
30
|
+
# Optional adapters - only import if dependencies are available
|
31
|
+
try:
|
32
|
+
import psycopg2
|
33
|
+
from .postgresql import PostgreSQLAdapter
|
34
|
+
__all__.append("PostgreSQLAdapter")
|
35
|
+
except ImportError:
|
36
|
+
logger.debug("psycopg2 not installed, PostgreSQLAdapter not available")
|
37
|
+
PostgreSQLAdapter = None
|
38
|
+
|
39
|
+
try:
|
40
|
+
import mariadb
|
41
|
+
from .mariadb import MariaDBAdapter
|
42
|
+
__all__.append("MariaDBAdapter")
|
43
|
+
except ImportError:
|
44
|
+
logger.debug("MariaDB connector not installed, MariaDBAdapter not available")
|
45
|
+
MariaDBAdapter = None
|
46
|
+
|
47
|
+
try:
|
48
|
+
import pyodbc
|
49
|
+
from .sqlserver import SQLServerAdapter
|
50
|
+
__all__.append("SQLServerAdapter")
|
51
|
+
except ImportError:
|
52
|
+
logger.debug("pyodbc not installed, SQLServerAdapter not available")
|
53
|
+
SQLServerAdapter = None
|
@@ -0,0 +1,397 @@
|
|
1
|
+
# Copyright 2025 Marco Pancotti
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
"""
|
16
|
+
MariaDB adapter implementation.
|
17
|
+
"""
|
18
|
+
import logging
|
19
|
+
from typing import Any, Dict, List, Optional, Union
|
20
|
+
import mariadb
|
21
|
+
from sqlalchemy import create_engine, text, inspect
|
22
|
+
from sqlalchemy.exc import SQLAlchemyError
|
23
|
+
|
24
|
+
from ..core.interfaces import DbAdapter
|
25
|
+
from ..documents import (
|
26
|
+
TableDocument,
|
27
|
+
ColumnDocument,
|
28
|
+
SchemaDocument,
|
29
|
+
ForeignKeyDocument,
|
30
|
+
IndexDocument
|
31
|
+
)
|
32
|
+
|
33
|
+
logger = logging.getLogger(__name__)
|
34
|
+
|
35
|
+
|
36
|
+
class MariaDBAdapter(DbAdapter):
|
37
|
+
"""
|
38
|
+
MariaDB database adapter implementation.
|
39
|
+
"""
|
40
|
+
|
41
|
+
def __init__(self, connection_params: Dict[str, Any]):
|
42
|
+
super().__init__(connection_params)
|
43
|
+
self.engine = None
|
44
|
+
self.raw_connection = None
|
45
|
+
self.host = connection_params.get('host', 'localhost')
|
46
|
+
self.port = connection_params.get('port', 3307)
|
47
|
+
self.database = connection_params.get('database')
|
48
|
+
self.user = connection_params.get('user')
|
49
|
+
self.password = connection_params.get('password')
|
50
|
+
|
51
|
+
def connect(self) -> None:
|
52
|
+
"""Establish MariaDB connection"""
|
53
|
+
try:
|
54
|
+
# Create SQLAlchemy engine
|
55
|
+
connection_string = self._build_connection_string()
|
56
|
+
self.engine = create_engine(connection_string, echo=False)
|
57
|
+
|
58
|
+
# Test connection
|
59
|
+
with self.engine.connect() as conn:
|
60
|
+
conn.execute(text("SELECT 1"))
|
61
|
+
|
62
|
+
# Also create raw mariadb connection for specific operations
|
63
|
+
self.raw_connection = mariadb.connect(
|
64
|
+
host=self.host,
|
65
|
+
port=self.port,
|
66
|
+
database=self.database,
|
67
|
+
user=self.user,
|
68
|
+
password=self.password
|
69
|
+
)
|
70
|
+
|
71
|
+
self._initialized = True
|
72
|
+
logger.info("MariaDB connection established successfully")
|
73
|
+
|
74
|
+
except Exception as e:
|
75
|
+
logger.error(f"Failed to connect to MariaDB: {e}")
|
76
|
+
raise
|
77
|
+
|
78
|
+
def disconnect(self) -> None:
|
79
|
+
"""Close MariaDB connection"""
|
80
|
+
try:
|
81
|
+
if self.engine:
|
82
|
+
self.engine.dispose()
|
83
|
+
self.engine = None
|
84
|
+
|
85
|
+
if self.raw_connection:
|
86
|
+
self.raw_connection.close()
|
87
|
+
self.raw_connection = None
|
88
|
+
|
89
|
+
self._initialized = False
|
90
|
+
logger.info("MariaDB connection closed")
|
91
|
+
|
92
|
+
except Exception as e:
|
93
|
+
logger.error(f"Error closing MariaDB connection: {e}")
|
94
|
+
|
95
|
+
def _build_connection_string(self) -> str:
|
96
|
+
"""Build SQLAlchemy connection string for MariaDB"""
|
97
|
+
if not all([self.database, self.user, self.password]):
|
98
|
+
raise ValueError("Missing required connection parameters: database, user, password")
|
99
|
+
|
100
|
+
# MariaDB uses mysql+pymysql or mariadb+mariadbconnector dialect
|
101
|
+
return f"mariadb+mariadbconnector://{self.user}:{self.password}@{self.host}:{self.port}/{self.database}"
|
102
|
+
|
103
|
+
def execute_query(self, query: str, params: Optional[Dict] = None, fetch: Union[str, int] = "all", timeout: int = 60) -> Any:
|
104
|
+
"""Execute SQL query"""
|
105
|
+
if not self.engine:
|
106
|
+
raise RuntimeError("Not connected to database")
|
107
|
+
|
108
|
+
try:
|
109
|
+
with self.engine.connect() as conn:
|
110
|
+
# MariaDB doesn't have direct query timeout in the same way
|
111
|
+
# but we can set connection timeout
|
112
|
+
conn.execute(text(f"SET SESSION max_statement_time = {timeout}"))
|
113
|
+
|
114
|
+
# Execute query
|
115
|
+
if params:
|
116
|
+
result = conn.execute(text(query), params)
|
117
|
+
else:
|
118
|
+
result = conn.execute(text(query))
|
119
|
+
|
120
|
+
# Handle different fetch modes
|
121
|
+
if query.strip().upper().startswith(('SELECT', 'WITH', 'SHOW', 'DESCRIBE')):
|
122
|
+
if fetch == "all":
|
123
|
+
return [dict(row._mapping) for row in result]
|
124
|
+
elif fetch == "one":
|
125
|
+
row = result.first()
|
126
|
+
return dict(row._mapping) if row else None
|
127
|
+
elif isinstance(fetch, int):
|
128
|
+
rows = result.fetchmany(fetch)
|
129
|
+
return [dict(row._mapping) for row in rows]
|
130
|
+
else:
|
131
|
+
# For INSERT, UPDATE, DELETE
|
132
|
+
conn.commit()
|
133
|
+
return result.rowcount
|
134
|
+
|
135
|
+
except SQLAlchemyError as e:
|
136
|
+
logger.error(f"MariaDB query execution failed: {e}")
|
137
|
+
raise
|
138
|
+
|
139
|
+
def get_tables_as_documents(self) -> List[TableDocument]:
|
140
|
+
"""Return tables as document objects"""
|
141
|
+
if not self.engine:
|
142
|
+
raise RuntimeError("Not connected to database")
|
143
|
+
|
144
|
+
try:
|
145
|
+
inspector = inspect(self.engine)
|
146
|
+
tables = []
|
147
|
+
|
148
|
+
for table_name in inspector.get_table_names():
|
149
|
+
# Get row count
|
150
|
+
count_result = self.execute_query(f"SELECT COUNT(*) as count FROM {table_name}", fetch="one")
|
151
|
+
row_count = count_result.get('count', 0) if count_result else 0
|
152
|
+
|
153
|
+
# Get column count
|
154
|
+
columns = inspector.get_columns(table_name)
|
155
|
+
|
156
|
+
# Get table comment (if available)
|
157
|
+
table_comment = ""
|
158
|
+
try:
|
159
|
+
comment_result = self.execute_query(
|
160
|
+
f"SELECT table_comment FROM information_schema.tables WHERE table_name = '{table_name}'",
|
161
|
+
fetch="one"
|
162
|
+
)
|
163
|
+
table_comment = comment_result.get('table_comment', '') if comment_result else ''
|
164
|
+
except:
|
165
|
+
pass
|
166
|
+
|
167
|
+
tables.append(TableDocument(
|
168
|
+
table_name=table_name,
|
169
|
+
table_type="TABLE",
|
170
|
+
row_count=row_count,
|
171
|
+
column_count=len(columns),
|
172
|
+
description=table_comment
|
173
|
+
))
|
174
|
+
|
175
|
+
return tables
|
176
|
+
|
177
|
+
except Exception as e:
|
178
|
+
logger.error(f"Error getting tables as documents: {e}")
|
179
|
+
raise
|
180
|
+
|
181
|
+
def get_columns_as_documents(self, table_name: str) -> List[ColumnDocument]:
|
182
|
+
"""Return columns as document objects"""
|
183
|
+
if not self.engine:
|
184
|
+
raise RuntimeError("Not connected to database")
|
185
|
+
|
186
|
+
try:
|
187
|
+
inspector = inspect(self.engine)
|
188
|
+
columns = []
|
189
|
+
|
190
|
+
for col in inspector.get_columns(table_name):
|
191
|
+
columns.append(ColumnDocument(
|
192
|
+
table_name=table_name,
|
193
|
+
column_name=col['name'],
|
194
|
+
data_type=str(col['type']),
|
195
|
+
is_nullable=col.get('nullable', True),
|
196
|
+
column_default=col.get('default'),
|
197
|
+
is_pk=col.get('primary_key', False),
|
198
|
+
column_comment=col.get('comment', '')
|
199
|
+
))
|
200
|
+
|
201
|
+
# Mark primary keys
|
202
|
+
pk_constraint = inspector.get_pk_constraint(table_name)
|
203
|
+
if pk_constraint and pk_constraint.get('constrained_columns'):
|
204
|
+
pk_columns = pk_constraint['constrained_columns']
|
205
|
+
for col in columns:
|
206
|
+
if col.column_name in pk_columns:
|
207
|
+
col.is_pk = True
|
208
|
+
|
209
|
+
return columns
|
210
|
+
|
211
|
+
except Exception as e:
|
212
|
+
logger.error(f"Error getting columns as documents: {e}")
|
213
|
+
raise
|
214
|
+
|
215
|
+
def get_foreign_keys_as_documents(self) -> List[ForeignKeyDocument]:
|
216
|
+
"""Return foreign keys as document objects"""
|
217
|
+
if not self.engine:
|
218
|
+
raise RuntimeError("Not connected to database")
|
219
|
+
|
220
|
+
try:
|
221
|
+
inspector = inspect(self.engine)
|
222
|
+
foreign_keys = []
|
223
|
+
|
224
|
+
for table_name in inspector.get_table_names():
|
225
|
+
for fk in inspector.get_foreign_keys(table_name):
|
226
|
+
# Each foreign key can have multiple column pairs
|
227
|
+
for i, const_col in enumerate(fk['constrained_columns']):
|
228
|
+
foreign_keys.append(ForeignKeyDocument(
|
229
|
+
constraint_name=fk['name'],
|
230
|
+
table_name=table_name,
|
231
|
+
column_name=const_col,
|
232
|
+
foreign_table_name=fk['referred_table'],
|
233
|
+
foreign_column_name=fk['referred_columns'][i] if i < len(fk['referred_columns']) else None
|
234
|
+
))
|
235
|
+
|
236
|
+
return foreign_keys
|
237
|
+
|
238
|
+
except Exception as e:
|
239
|
+
logger.error(f"Error getting foreign keys as documents: {e}")
|
240
|
+
raise
|
241
|
+
|
242
|
+
def get_schemas_as_documents(self) -> List[SchemaDocument]:
|
243
|
+
"""Return schemas as document objects"""
|
244
|
+
# MariaDB uses database as schema concept
|
245
|
+
if not self.engine:
|
246
|
+
raise RuntimeError("Not connected to database")
|
247
|
+
|
248
|
+
try:
|
249
|
+
# Get current database as schema
|
250
|
+
result = self.execute_query("SELECT DATABASE() as db_name", fetch="one")
|
251
|
+
current_db = result.get('db_name') if result else self.database
|
252
|
+
|
253
|
+
# Get table count for current database
|
254
|
+
tables = self.get_tables_as_documents()
|
255
|
+
|
256
|
+
return [SchemaDocument(
|
257
|
+
catalog_name=current_db,
|
258
|
+
schema_name=current_db,
|
259
|
+
schema_owner=self.user,
|
260
|
+
table_count=len(tables)
|
261
|
+
)]
|
262
|
+
|
263
|
+
except Exception as e:
|
264
|
+
logger.error(f"Error getting schemas as documents: {e}")
|
265
|
+
raise
|
266
|
+
|
267
|
+
def get_indexes_as_documents(self, table_name: Optional[str] = None) -> List[IndexDocument]:
|
268
|
+
"""Return indexes as document objects"""
|
269
|
+
if not self.engine:
|
270
|
+
raise RuntimeError("Not connected to database")
|
271
|
+
|
272
|
+
try:
|
273
|
+
inspector = inspect(self.engine)
|
274
|
+
indexes = []
|
275
|
+
|
276
|
+
# Get tables to process
|
277
|
+
tables = [table_name] if table_name else inspector.get_table_names()
|
278
|
+
|
279
|
+
for tbl in tables:
|
280
|
+
for idx in inspector.get_indexes(tbl):
|
281
|
+
indexes.append(IndexDocument(
|
282
|
+
table_name=tbl,
|
283
|
+
index_name=idx['name'],
|
284
|
+
column_names=idx['column_names'],
|
285
|
+
is_unique=idx.get('unique', False),
|
286
|
+
index_type='BTREE' # MariaDB default
|
287
|
+
))
|
288
|
+
|
289
|
+
return indexes
|
290
|
+
|
291
|
+
except Exception as e:
|
292
|
+
logger.error(f"Error getting indexes as documents: {e}")
|
293
|
+
raise
|
294
|
+
|
295
|
+
def get_unique_values(self) -> Dict[str, Dict[str, List[str]]]:
|
296
|
+
"""
|
297
|
+
Get unique values from the database.
|
298
|
+
|
299
|
+
Returns:
|
300
|
+
Dict[str, Dict[str, List[str]]]: Dictionary where:
|
301
|
+
- outer key is table name
|
302
|
+
- inner key is column name
|
303
|
+
- value is list of unique values
|
304
|
+
"""
|
305
|
+
if not self.engine:
|
306
|
+
raise RuntimeError("Not connected to database")
|
307
|
+
|
308
|
+
try:
|
309
|
+
inspector = inspect(self.engine)
|
310
|
+
unique_values = {}
|
311
|
+
|
312
|
+
for table_name in inspector.get_table_names():
|
313
|
+
unique_values[table_name] = {}
|
314
|
+
|
315
|
+
for col in inspector.get_columns(table_name):
|
316
|
+
col_name = col['name']
|
317
|
+
# Only get unique values for reasonable data types
|
318
|
+
col_type = str(col['type']).upper()
|
319
|
+
|
320
|
+
if any(t in col_type for t in ['VARCHAR', 'CHAR', 'TEXT', 'INT', 'ENUM']):
|
321
|
+
try:
|
322
|
+
query = f"SELECT DISTINCT `{col_name}` FROM `{table_name}` LIMIT 100"
|
323
|
+
result = self.execute_query(query)
|
324
|
+
|
325
|
+
values = []
|
326
|
+
for row in result:
|
327
|
+
val = row.get(col_name)
|
328
|
+
if val is not None:
|
329
|
+
values.append(str(val))
|
330
|
+
|
331
|
+
if values:
|
332
|
+
unique_values[table_name][col_name] = values
|
333
|
+
|
334
|
+
except Exception as e:
|
335
|
+
logger.debug(f"Could not get unique values for {table_name}.{col_name}: {e}")
|
336
|
+
continue
|
337
|
+
|
338
|
+
return unique_values
|
339
|
+
|
340
|
+
except Exception as e:
|
341
|
+
logger.error(f"Error getting unique values: {e}")
|
342
|
+
raise
|
343
|
+
|
344
|
+
def get_example_data(self, table_name: str, number_of_rows: int = 30) -> Dict[str, List[Any]]:
|
345
|
+
"""
|
346
|
+
Get example data (most frequent values) for each column in a table.
|
347
|
+
|
348
|
+
Args:
|
349
|
+
table_name (str): The name of the table.
|
350
|
+
number_of_rows (int, optional): Maximum number of example values to return per column. Defaults to 30.
|
351
|
+
|
352
|
+
Returns:
|
353
|
+
Dict[str, List[Any]]: A dictionary mapping column names to lists of example values.
|
354
|
+
"""
|
355
|
+
if not self.engine:
|
356
|
+
raise RuntimeError("Not connected to database")
|
357
|
+
|
358
|
+
try:
|
359
|
+
inspector = inspect(self.engine)
|
360
|
+
columns = inspector.get_columns(table_name)
|
361
|
+
|
362
|
+
example_data = {}
|
363
|
+
|
364
|
+
for col in columns:
|
365
|
+
col_name = col['name']
|
366
|
+
col_type = str(col['type']).upper()
|
367
|
+
|
368
|
+
# Skip blob/binary columns
|
369
|
+
if any(t in col_type for t in ['BLOB', 'BINARY', 'IMAGE']):
|
370
|
+
example_data[col_name] = []
|
371
|
+
continue
|
372
|
+
|
373
|
+
try:
|
374
|
+
# Get most frequent values
|
375
|
+
query = f"""
|
376
|
+
SELECT `{col_name}`, COUNT(*) as freq
|
377
|
+
FROM `{table_name}`
|
378
|
+
WHERE `{col_name}` IS NOT NULL
|
379
|
+
GROUP BY `{col_name}`
|
380
|
+
ORDER BY freq DESC
|
381
|
+
LIMIT {number_of_rows}
|
382
|
+
"""
|
383
|
+
|
384
|
+
result = self.execute_query(query)
|
385
|
+
values = [row[col_name] for row in result]
|
386
|
+
|
387
|
+
example_data[col_name] = values
|
388
|
+
|
389
|
+
except Exception as e:
|
390
|
+
logger.debug(f"Could not get example data for {table_name}.{col_name}: {e}")
|
391
|
+
example_data[col_name] = []
|
392
|
+
|
393
|
+
return example_data
|
394
|
+
|
395
|
+
except Exception as e:
|
396
|
+
logger.error(f"Error getting example data: {e}")
|
397
|
+
raise
|