thoth-dbmanager 0.4.13__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- thoth_dbmanager/ThothDbManager.py +19 -227
- thoth_dbmanager/__init__.py +8 -63
- thoth_dbmanager/adapters/__init__.py +0 -6
- thoth_dbmanager/core/factory.py +0 -16
- thoth_dbmanager/dynamic_imports.py +0 -9
- thoth_dbmanager/helpers/multi_db_generator.py +1 -1
- thoth_dbmanager/plugins/__init__.py +0 -6
- thoth_dbmanager/plugins/sqlite.py +7 -0
- {thoth_dbmanager-0.4.13.dist-info → thoth_dbmanager-0.5.1.dist-info}/METADATA +228 -27
- {thoth_dbmanager-0.4.13.dist-info → thoth_dbmanager-0.5.1.dist-info}/RECORD +13 -19
- thoth_dbmanager/adapters/mysql.py +0 -165
- thoth_dbmanager/adapters/oracle.py +0 -554
- thoth_dbmanager/adapters/supabase.py +0 -249
- thoth_dbmanager/plugins/mysql.py +0 -408
- thoth_dbmanager/plugins/oracle.py +0 -150
- thoth_dbmanager/plugins/supabase.py +0 -224
- {thoth_dbmanager-0.4.13.dist-info → thoth_dbmanager-0.5.1.dist-info}/WHEEL +0 -0
- {thoth_dbmanager-0.4.13.dist-info → thoth_dbmanager-0.5.1.dist-info}/licenses/LICENSE +0 -0
- {thoth_dbmanager-0.4.13.dist-info → thoth_dbmanager-0.5.1.dist-info}/top_level.txt +0 -0
@@ -1,249 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Supabase adapter implementation.
|
3
|
-
"""
|
4
|
-
import logging
|
5
|
-
from typing import Any, Dict, List, Optional, Union
|
6
|
-
import psycopg2
|
7
|
-
from psycopg2.extras import RealDictCursor
|
8
|
-
from sqlalchemy import create_engine, text
|
9
|
-
from sqlalchemy.exc import SQLAlchemyError
|
10
|
-
from urllib.parse import urlparse, parse_qs
|
11
|
-
|
12
|
-
from .postgresql import PostgreSQLAdapter
|
13
|
-
from ..core.interfaces import DbAdapter
|
14
|
-
from ..documents import (
|
15
|
-
TableDocument,
|
16
|
-
ColumnDocument,
|
17
|
-
SchemaDocument,
|
18
|
-
ForeignKeyDocument,
|
19
|
-
IndexDocument
|
20
|
-
)
|
21
|
-
|
22
|
-
logger = logging.getLogger(__name__)
|
23
|
-
|
24
|
-
|
25
|
-
class SupabaseAdapter(PostgreSQLAdapter):
|
26
|
-
"""
|
27
|
-
Supabase database adapter implementation.
|
28
|
-
Extends PostgreSQL adapter with Supabase-specific features.
|
29
|
-
"""
|
30
|
-
|
31
|
-
def __init__(self, connection_params: Dict[str, Any]):
|
32
|
-
super().__init__(connection_params)
|
33
|
-
self.supabase_url = None
|
34
|
-
self.api_key = None
|
35
|
-
self.use_rest_api = False
|
36
|
-
|
37
|
-
def connect(self) -> None:
|
38
|
-
"""Establish Supabase connection with SSL enforcement"""
|
39
|
-
try:
|
40
|
-
# Check if we should use REST API or direct database connection
|
41
|
-
self.use_rest_api = self.connection_params.get('use_rest_api', False)
|
42
|
-
|
43
|
-
if self.use_rest_api:
|
44
|
-
# REST API connection setup
|
45
|
-
self.supabase_url = self.connection_params.get('project_url')
|
46
|
-
self.api_key = self.connection_params.get('api_key')
|
47
|
-
|
48
|
-
if not self.supabase_url or not self.api_key:
|
49
|
-
raise ValueError("project_url and api_key are required for REST API mode")
|
50
|
-
|
51
|
-
logger.info("Supabase REST API connection established")
|
52
|
-
else:
|
53
|
-
# Direct database connection (PostgreSQL with SSL)
|
54
|
-
super().connect()
|
55
|
-
|
56
|
-
# Ensure SSL is enabled for Supabase
|
57
|
-
if hasattr(self, 'engine') and self.engine:
|
58
|
-
# Update connection string to enforce SSL
|
59
|
-
connection_string = self._build_connection_string()
|
60
|
-
if 'sslmode=' not in connection_string:
|
61
|
-
connection_string += '?sslmode=require'
|
62
|
-
|
63
|
-
self.engine = create_engine(connection_string, echo=False)
|
64
|
-
|
65
|
-
# Test connection
|
66
|
-
with self.engine.connect() as conn:
|
67
|
-
conn.execute(text("SELECT 1"))
|
68
|
-
|
69
|
-
logger.info("Supabase database connection established with SSL")
|
70
|
-
|
71
|
-
except Exception as e:
|
72
|
-
logger.error(f"Failed to connect to Supabase: {e}")
|
73
|
-
raise
|
74
|
-
|
75
|
-
def _build_connection_string(self) -> str:
|
76
|
-
"""Build SQLAlchemy connection string with Supabase-specific parameters"""
|
77
|
-
params = self.connection_params
|
78
|
-
|
79
|
-
if self.use_rest_api:
|
80
|
-
return params.get('project_url')
|
81
|
-
|
82
|
-
# Direct database connection
|
83
|
-
host = params.get('host')
|
84
|
-
port = params.get('port', 5432)
|
85
|
-
database = params.get('database')
|
86
|
-
user = params.get('user')
|
87
|
-
password = params.get('password')
|
88
|
-
|
89
|
-
if not all([host, database, user, password]):
|
90
|
-
raise ValueError("Missing required connection parameters: host, database, user, password")
|
91
|
-
|
92
|
-
# Ensure SSL mode for Supabase
|
93
|
-
ssl_mode = params.get('sslmode', 'require')
|
94
|
-
|
95
|
-
connection_string = f"postgresql://{user}:{password}@{host}:{port}/{database}?sslmode={ssl_mode}"
|
96
|
-
|
97
|
-
# Add additional SSL parameters if provided
|
98
|
-
if params.get('sslcert'):
|
99
|
-
connection_string += f"&sslcert={params['sslcert']}"
|
100
|
-
if params.get('sslkey'):
|
101
|
-
connection_string += f"&sslkey={params['sslkey']}"
|
102
|
-
if params.get('sslrootcert'):
|
103
|
-
connection_string += f"&sslrootcert={params['sslrootcert']}"
|
104
|
-
|
105
|
-
return connection_string
|
106
|
-
|
107
|
-
def _get_psycopg2_params(self) -> Dict[str, Any]:
|
108
|
-
"""Get parameters for psycopg2 connection with SSL"""
|
109
|
-
params = super()._get_psycopg2_params()
|
110
|
-
|
111
|
-
# Ensure SSL is enabled for Supabase
|
112
|
-
params['sslmode'] = self.connection_params.get('sslmode', 'require')
|
113
|
-
|
114
|
-
# Add SSL certificates if provided
|
115
|
-
if self.connection_params.get('sslcert'):
|
116
|
-
params['sslcert'] = self.connection_params['sslcert']
|
117
|
-
if self.connection_params.get('sslkey'):
|
118
|
-
params['sslkey'] = self.connection_params['sslkey']
|
119
|
-
if self.connection_params.get('sslrootcert'):
|
120
|
-
params['sslrootcert'] = self.connection_params['sslrootcert']
|
121
|
-
|
122
|
-
return params
|
123
|
-
|
124
|
-
def execute_query(self, query: str, params: Optional[Dict] = None, fetch: Union[str, int] = "all", timeout: int = 60) -> Any:
|
125
|
-
"""Execute SQL queries with Supabase-specific optimizations"""
|
126
|
-
if self.use_rest_api:
|
127
|
-
return self._execute_rest_query(query, params, fetch, timeout)
|
128
|
-
else:
|
129
|
-
return super().execute_query(query, params, fetch, timeout)
|
130
|
-
|
131
|
-
def _execute_rest_query(self, query: str, params: Optional[Dict] = None, fetch: Union[str, int] = "all", timeout: int = 60) -> Any:
|
132
|
-
"""Execute query using Supabase REST API"""
|
133
|
-
try:
|
134
|
-
from supabase import create_client
|
135
|
-
from postgrest.exceptions import APIError
|
136
|
-
|
137
|
-
# Create Supabase client
|
138
|
-
supabase = create_client(self.supabase_url, self.api_key)
|
139
|
-
|
140
|
-
# For REST API, we need to convert SQL to Postgrest queries
|
141
|
-
# This is a simplified implementation - in practice, you'd need a SQL parser
|
142
|
-
if query.strip().upper().startswith('SELECT'):
|
143
|
-
# Extract table name and conditions from query
|
144
|
-
table_name = self._extract_table_name(query)
|
145
|
-
|
146
|
-
# Build Postgrest query
|
147
|
-
result = supabase.table(table_name).select('*').execute()
|
148
|
-
|
149
|
-
if fetch == "all":
|
150
|
-
return result.data
|
151
|
-
elif fetch == "one":
|
152
|
-
return result.data[0] if result.data else None
|
153
|
-
elif isinstance(fetch, int):
|
154
|
-
return result.data[:fetch]
|
155
|
-
else:
|
156
|
-
return result.data
|
157
|
-
else:
|
158
|
-
# For non-SELECT queries, use RPC
|
159
|
-
result = supabase.rpc('execute_sql', {'sql': query}).execute()
|
160
|
-
return result.data
|
161
|
-
|
162
|
-
except ImportError:
|
163
|
-
raise RuntimeError("supabase-py package is required for REST API mode")
|
164
|
-
except APIError as e:
|
165
|
-
logger.error(f"Supabase REST API error: {e}")
|
166
|
-
raise
|
167
|
-
|
168
|
-
def _extract_table_name(self, query: str) -> str:
|
169
|
-
"""Extract table name from SQL query (simplified)"""
|
170
|
-
# This is a basic implementation - in practice, you'd use a proper SQL parser
|
171
|
-
query = query.upper()
|
172
|
-
from_index = query.find('FROM')
|
173
|
-
if from_index != -1:
|
174
|
-
after_from = query[from_index + 4:].strip()
|
175
|
-
# Find first space or end of string
|
176
|
-
space_index = after_from.find(' ')
|
177
|
-
if space_index != -1:
|
178
|
-
return after_from[:space_index].lower()
|
179
|
-
else:
|
180
|
-
return after_from.lower()
|
181
|
-
return "unknown"
|
182
|
-
|
183
|
-
def get_tables_as_documents(self) -> List[TableDocument]:
|
184
|
-
"""Get tables with Supabase schema considerations"""
|
185
|
-
tables = super().get_tables_as_documents()
|
186
|
-
|
187
|
-
# Filter out Supabase system schemas
|
188
|
-
filtered_tables = []
|
189
|
-
for table in tables:
|
190
|
-
if table.schema_name not in ['auth', 'storage', 'realtime', 'supabase_functions']:
|
191
|
-
filtered_tables.append(table)
|
192
|
-
|
193
|
-
return filtered_tables
|
194
|
-
|
195
|
-
def get_columns_as_documents(self, table_name: str) -> List[ColumnDocument]:
|
196
|
-
"""Get columns with Supabase-specific handling"""
|
197
|
-
columns = super().get_columns_as_documents(table_name)
|
198
|
-
|
199
|
-
# Add Supabase-specific metadata
|
200
|
-
for column in columns:
|
201
|
-
if column.column_name in ['created_at', 'updated_at']:
|
202
|
-
column.comment = f"{column.comment} (Supabase auto-timestamp)"
|
203
|
-
elif column.column_name == 'id':
|
204
|
-
column.comment = f"{column.comment} (Supabase auto-increment)"
|
205
|
-
|
206
|
-
return columns
|
207
|
-
|
208
|
-
def get_unique_values(self) -> Dict[str, Dict[str, List[str]]]:
|
209
|
-
"""Get unique values with Supabase schema filtering"""
|
210
|
-
result = super().get_unique_values()
|
211
|
-
|
212
|
-
# Filter out Supabase system tables
|
213
|
-
filtered_result = {}
|
214
|
-
for table_name, columns in result.items():
|
215
|
-
if not table_name.startswith('auth_') and not table_name.startswith('storage_'):
|
216
|
-
filtered_result[table_name] = columns
|
217
|
-
|
218
|
-
return filtered_result
|
219
|
-
|
220
|
-
def get_example_data(self, table_name: str, number_of_rows: int = 30) -> Dict[str, List[Any]]:
|
221
|
-
"""Get example data with Supabase-specific handling"""
|
222
|
-
if self.use_rest_api:
|
223
|
-
return self._get_example_data_rest(table_name, number_of_rows)
|
224
|
-
else:
|
225
|
-
return super().get_example_data(table_name, number_of_rows)
|
226
|
-
|
227
|
-
def _get_example_data_rest(self, table_name: str, number_of_rows: int = 30) -> Dict[str, List[Any]]:
|
228
|
-
"""Get example data using REST API"""
|
229
|
-
try:
|
230
|
-
from supabase import create_client
|
231
|
-
|
232
|
-
supabase = create_client(self.supabase_url, self.api_key)
|
233
|
-
|
234
|
-
# Get data from REST API
|
235
|
-
result = supabase.table(table_name).select('*').limit(number_of_rows).execute()
|
236
|
-
|
237
|
-
# Convert to the expected format
|
238
|
-
example_data = {}
|
239
|
-
if result.data:
|
240
|
-
for key in result.data[0].keys():
|
241
|
-
example_data[key] = [row.get(key) for row in result.data]
|
242
|
-
|
243
|
-
return example_data
|
244
|
-
|
245
|
-
except ImportError:
|
246
|
-
raise RuntimeError("supabase-py package is required for REST API mode")
|
247
|
-
except Exception as e:
|
248
|
-
logger.error(f"Error getting example data via REST API: {e}")
|
249
|
-
return {}
|
thoth_dbmanager/plugins/mysql.py
DELETED
@@ -1,408 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
MySQL plugin for Thoth SQL Database Manager.
|
3
|
-
Unified implementation combining plugin architecture with full database functionality.
|
4
|
-
"""
|
5
|
-
|
6
|
-
import logging
|
7
|
-
import os
|
8
|
-
from pathlib import Path
|
9
|
-
from threading import Lock
|
10
|
-
from typing import Any, Dict, List, Optional, Union
|
11
|
-
|
12
|
-
from sqlalchemy import create_engine, inspect, text
|
13
|
-
from sqlalchemy.exc import SQLAlchemyError
|
14
|
-
|
15
|
-
from ..core.interfaces import DbPlugin, DbAdapter
|
16
|
-
from ..core.registry import register_plugin
|
17
|
-
from ..documents import TableDocument, ColumnDocument, ForeignKeyDocument, SchemaDocument, IndexDocument
|
18
|
-
|
19
|
-
logger = logging.getLogger(__name__)
|
20
|
-
|
21
|
-
|
22
|
-
class MySQLAdapter(DbAdapter):
|
23
|
-
"""MySQL database adapter with full functionality."""
|
24
|
-
|
25
|
-
def __init__(self, connection_params: Dict[str, Any]):
|
26
|
-
super().__init__(connection_params)
|
27
|
-
self.engine = None
|
28
|
-
self.host = connection_params.get('host')
|
29
|
-
self.port = connection_params.get('port', 3306)
|
30
|
-
self.dbname = connection_params.get('database') or connection_params.get('dbname')
|
31
|
-
self.user = connection_params.get('user') or connection_params.get('username')
|
32
|
-
self.password = connection_params.get('password')
|
33
|
-
|
34
|
-
def connect(self) -> None:
|
35
|
-
"""Establish database connection."""
|
36
|
-
try:
|
37
|
-
connection_string = f"mysql+mysqlconnector://{self.user}:{self.password}@{self.host}:{self.port}/{self.dbname}"
|
38
|
-
self.engine = create_engine(connection_string, pool_pre_ping=True)
|
39
|
-
self.connection = self.engine
|
40
|
-
self._initialized = True
|
41
|
-
except Exception as e:
|
42
|
-
raise ConnectionError(f"Failed to connect to MySQL: {e}")
|
43
|
-
|
44
|
-
def disconnect(self) -> None:
|
45
|
-
"""Close database connection."""
|
46
|
-
if self.engine:
|
47
|
-
self.engine.dispose()
|
48
|
-
self.engine = None
|
49
|
-
self.connection = None
|
50
|
-
|
51
|
-
def execute_query(self, query: str, params: Optional[Dict] = None, fetch: Union[str, int] = "all", timeout: int = 60) -> Any:
|
52
|
-
"""Execute a query and return results."""
|
53
|
-
if not self.engine:
|
54
|
-
self.connect()
|
55
|
-
|
56
|
-
with self.engine.connect() as connection:
|
57
|
-
try:
|
58
|
-
if params:
|
59
|
-
result = connection.execute(text(query), params)
|
60
|
-
else:
|
61
|
-
result = connection.execute(text(query))
|
62
|
-
|
63
|
-
# Check if this is a query that returns rows (SELECT, SHOW, etc.)
|
64
|
-
query_upper = query.strip().upper()
|
65
|
-
if query_upper.startswith(('SELECT', 'SHOW', 'DESCRIBE', 'DESC', 'EXPLAIN', 'WITH')):
|
66
|
-
if fetch == "all":
|
67
|
-
return [row._asdict() for row in result.fetchall()]
|
68
|
-
elif fetch == "one":
|
69
|
-
row = result.fetchone()
|
70
|
-
return row._asdict() if row else None
|
71
|
-
elif isinstance(fetch, int) and fetch > 0:
|
72
|
-
return [row._asdict() for row in result.fetchmany(fetch)]
|
73
|
-
else:
|
74
|
-
return [row._asdict() for row in result.fetchall()]
|
75
|
-
else:
|
76
|
-
# For DDL/DML queries (CREATE, INSERT, UPDATE, DELETE), return rowcount
|
77
|
-
connection.commit()
|
78
|
-
return result.rowcount
|
79
|
-
except SQLAlchemyError as e:
|
80
|
-
logger.error(f"Error executing SQL: {str(e)}")
|
81
|
-
raise e
|
82
|
-
|
83
|
-
def get_tables_as_documents(self) -> List[TableDocument]:
|
84
|
-
"""Return tables as document objects."""
|
85
|
-
inspector = inspect(self.engine)
|
86
|
-
table_names = inspector.get_table_names()
|
87
|
-
tables = []
|
88
|
-
|
89
|
-
for table_name in table_names:
|
90
|
-
try:
|
91
|
-
table_comment = inspector.get_table_comment(table_name).get('text', '')
|
92
|
-
except SQLAlchemyError:
|
93
|
-
table_comment = ''
|
94
|
-
|
95
|
-
tables.append(TableDocument(
|
96
|
-
table_name=table_name,
|
97
|
-
schema_name="", # MySQL doesn't have explicit schemas like PostgreSQL
|
98
|
-
comment=table_comment or "",
|
99
|
-
row_count=None # Could be populated if needed
|
100
|
-
))
|
101
|
-
|
102
|
-
return tables
|
103
|
-
|
104
|
-
def get_columns_as_documents(self, table_name: str) -> List[ColumnDocument]:
|
105
|
-
"""Return columns as document objects."""
|
106
|
-
inspector = inspect(self.engine)
|
107
|
-
columns_metadata = inspector.get_columns(table_name)
|
108
|
-
pk_columns = inspector.get_pk_constraint(table_name).get('constrained_columns', [])
|
109
|
-
|
110
|
-
columns = []
|
111
|
-
for col_meta in columns_metadata:
|
112
|
-
columns.append(ColumnDocument(
|
113
|
-
table_name=table_name,
|
114
|
-
column_name=col_meta['name'],
|
115
|
-
data_type=str(col_meta['type']),
|
116
|
-
is_nullable=col_meta.get('nullable', True),
|
117
|
-
is_pk=col_meta['name'] in pk_columns,
|
118
|
-
comment=col_meta.get('comment', '') or ""
|
119
|
-
))
|
120
|
-
|
121
|
-
return columns
|
122
|
-
|
123
|
-
def get_foreign_keys_as_documents(self) -> List[ForeignKeyDocument]:
|
124
|
-
"""Return foreign keys as document objects."""
|
125
|
-
inspector = inspect(self.engine)
|
126
|
-
all_foreign_keys = []
|
127
|
-
|
128
|
-
for table_name in inspector.get_table_names():
|
129
|
-
fks = inspector.get_foreign_keys(table_name)
|
130
|
-
for fk in fks:
|
131
|
-
all_foreign_keys.append(ForeignKeyDocument(
|
132
|
-
source_table_name=table_name,
|
133
|
-
source_column_name=fk['constrained_columns'][0],
|
134
|
-
target_table_name=fk['referred_table'],
|
135
|
-
target_column_name=fk['referred_columns'][0],
|
136
|
-
constraint_name=fk.get('name', '')
|
137
|
-
))
|
138
|
-
|
139
|
-
return all_foreign_keys
|
140
|
-
|
141
|
-
def get_schemas_as_documents(self) -> List[SchemaDocument]:
|
142
|
-
"""Return schemas as document objects."""
|
143
|
-
# MySQL doesn't have explicit schemas like PostgreSQL
|
144
|
-
return [SchemaDocument(
|
145
|
-
schema_name="default",
|
146
|
-
comment="Default MySQL schema"
|
147
|
-
)]
|
148
|
-
|
149
|
-
def get_indexes_as_documents(self, table_name: Optional[str] = None) -> List[IndexDocument]:
|
150
|
-
"""Return indexes as document objects."""
|
151
|
-
inspector = inspect(self.engine)
|
152
|
-
indexes = []
|
153
|
-
|
154
|
-
tables = [table_name] if table_name else inspector.get_table_names()
|
155
|
-
|
156
|
-
for tbl_name in tables:
|
157
|
-
try:
|
158
|
-
table_indexes = inspector.get_indexes(tbl_name)
|
159
|
-
for idx in table_indexes:
|
160
|
-
indexes.append(IndexDocument(
|
161
|
-
table_name=tbl_name,
|
162
|
-
index_name=idx['name'],
|
163
|
-
column_names=idx['column_names'],
|
164
|
-
is_unique=idx['unique'],
|
165
|
-
index_type="BTREE" # Default for MySQL
|
166
|
-
))
|
167
|
-
except SQLAlchemyError as e:
|
168
|
-
logger.warning(f"Could not get indexes for table {tbl_name}: {e}")
|
169
|
-
|
170
|
-
return indexes
|
171
|
-
|
172
|
-
def get_unique_values(self) -> Dict[str, Dict[str, List[str]]]:
|
173
|
-
"""Get unique values from the database."""
|
174
|
-
# This is a placeholder implementation.
|
175
|
-
# A more sophisticated version like in ThothPgManager should be implemented.
|
176
|
-
return {}
|
177
|
-
|
178
|
-
def get_example_data(self, table_name: str, number_of_rows: int = 30) -> Dict[str, List[Any]]:
|
179
|
-
"""Get example data (most frequent values) for each column in a table."""
|
180
|
-
inspector = inspect(self.engine)
|
181
|
-
try:
|
182
|
-
columns = inspector.get_columns(table_name)
|
183
|
-
except SQLAlchemyError as e:
|
184
|
-
logger.error(f"Error inspecting columns for table {table_name}: {e}")
|
185
|
-
raise e
|
186
|
-
|
187
|
-
if not columns:
|
188
|
-
logger.warning(f"No columns found for table {table_name}")
|
189
|
-
return {}
|
190
|
-
|
191
|
-
most_frequent_values: Dict[str, List[Any]] = {}
|
192
|
-
|
193
|
-
with self.engine.connect() as connection:
|
194
|
-
for col_info in columns:
|
195
|
-
column_name = col_info['name']
|
196
|
-
# MySQL uses backticks for identifier quoting
|
197
|
-
quoted_column_name = f'`{column_name}`'
|
198
|
-
quoted_table_name = f'`{table_name}`'
|
199
|
-
|
200
|
-
query_str = f"""
|
201
|
-
SELECT {quoted_column_name}
|
202
|
-
FROM (
|
203
|
-
SELECT {quoted_column_name}, COUNT(*) as _freq
|
204
|
-
FROM {quoted_table_name}
|
205
|
-
WHERE {quoted_column_name} IS NOT NULL
|
206
|
-
GROUP BY {quoted_column_name}
|
207
|
-
ORDER BY _freq DESC
|
208
|
-
LIMIT :num_rows
|
209
|
-
) as subquery;
|
210
|
-
"""
|
211
|
-
try:
|
212
|
-
result = connection.execute(text(query_str), {"num_rows": number_of_rows})
|
213
|
-
values = [row[0] for row in result]
|
214
|
-
most_frequent_values[column_name] = values
|
215
|
-
except SQLAlchemyError as e:
|
216
|
-
logger.error(f"Error fetching frequent values for {column_name} in {table_name}: {e}")
|
217
|
-
most_frequent_values[column_name] = []
|
218
|
-
|
219
|
-
# Normalize list lengths
|
220
|
-
max_length = 0
|
221
|
-
if most_frequent_values:
|
222
|
-
max_length = max(len(v) for v in most_frequent_values.values()) if most_frequent_values else 0
|
223
|
-
|
224
|
-
for column_name in most_frequent_values:
|
225
|
-
current_len = len(most_frequent_values[column_name])
|
226
|
-
if current_len < max_length:
|
227
|
-
most_frequent_values[column_name].extend([None] * (max_length - current_len))
|
228
|
-
|
229
|
-
return most_frequent_values
|
230
|
-
|
231
|
-
|
232
|
-
@register_plugin("mysql")
|
233
|
-
class MySQLPlugin(DbPlugin):
|
234
|
-
"""MySQL database plugin with full functionality."""
|
235
|
-
|
236
|
-
plugin_name = "MySQL Plugin"
|
237
|
-
plugin_version = "1.0.0"
|
238
|
-
supported_db_types = ["mysql"]
|
239
|
-
required_dependencies = ["mysql-connector-python", "SQLAlchemy"]
|
240
|
-
|
241
|
-
_instances = {}
|
242
|
-
_lock = Lock()
|
243
|
-
|
244
|
-
def __init__(self, db_root_path: str, db_mode: str = "dev", **kwargs):
|
245
|
-
super().__init__(db_root_path, db_mode, **kwargs)
|
246
|
-
self.db_id = None
|
247
|
-
self.db_directory_path = None
|
248
|
-
self.host = None
|
249
|
-
self.port = None
|
250
|
-
self.dbname = None
|
251
|
-
self.user = None
|
252
|
-
self.password = None
|
253
|
-
|
254
|
-
# LSH manager integration (for backward compatibility)
|
255
|
-
self._lsh_manager = None
|
256
|
-
|
257
|
-
@classmethod
|
258
|
-
def get_instance(cls, host: str, port: int, dbname: str, user: str, password: str,
|
259
|
-
db_root_path: str, db_mode: str = "dev", **kwargs):
|
260
|
-
"""Get or create a singleton instance based on connection parameters."""
|
261
|
-
required_params = ['host', 'port', 'dbname', 'user', 'password', 'db_root_path']
|
262
|
-
|
263
|
-
all_params = {
|
264
|
-
'host': host,
|
265
|
-
'port': port,
|
266
|
-
'dbname': dbname,
|
267
|
-
'user': user,
|
268
|
-
'password': password,
|
269
|
-
'db_root_path': db_root_path,
|
270
|
-
'db_mode': db_mode,
|
271
|
-
**kwargs
|
272
|
-
}
|
273
|
-
|
274
|
-
missing_params = [param for param in required_params if all_params.get(param) is None]
|
275
|
-
if missing_params:
|
276
|
-
raise ValueError(f"Missing required parameter{'s' if len(missing_params) > 1 else ''}: {', '.join(missing_params)}")
|
277
|
-
|
278
|
-
with cls._lock:
|
279
|
-
instance_key = (host, port, dbname, user, password, db_root_path, db_mode)
|
280
|
-
|
281
|
-
if instance_key not in cls._instances:
|
282
|
-
instance = cls(db_root_path=db_root_path, db_mode=db_mode, **all_params)
|
283
|
-
instance.initialize(**all_params)
|
284
|
-
cls._instances[instance_key] = instance
|
285
|
-
|
286
|
-
return cls._instances[instance_key]
|
287
|
-
|
288
|
-
def create_adapter(self, **kwargs) -> DbAdapter:
|
289
|
-
"""Create and return a MySQL adapter instance."""
|
290
|
-
return MySQLAdapter(kwargs)
|
291
|
-
|
292
|
-
def validate_connection_params(self, **kwargs) -> bool:
|
293
|
-
"""Validate connection parameters for MySQL."""
|
294
|
-
required = ['host', 'port', 'user', 'password']
|
295
|
-
database = kwargs.get('database') or kwargs.get('dbname')
|
296
|
-
|
297
|
-
if not database:
|
298
|
-
logger.error("Either 'database' or 'dbname' is required for MySQL")
|
299
|
-
return False
|
300
|
-
|
301
|
-
for param in required:
|
302
|
-
if param not in kwargs:
|
303
|
-
logger.error(f"Missing required parameter: {param}")
|
304
|
-
return False
|
305
|
-
|
306
|
-
port = kwargs.get('port')
|
307
|
-
if not isinstance(port, int) or not (1 <= port <= 65535):
|
308
|
-
logger.error("port must be an integer between 1 and 65535")
|
309
|
-
return False
|
310
|
-
|
311
|
-
return True
|
312
|
-
|
313
|
-
def initialize(self, **kwargs) -> None:
|
314
|
-
"""Initialize the MySQL plugin."""
|
315
|
-
# Validate and extract parameters
|
316
|
-
self.host = kwargs.get('host')
|
317
|
-
self.port = kwargs.get('port', 3306)
|
318
|
-
self.dbname = kwargs.get('database') or kwargs.get('dbname')
|
319
|
-
self.user = kwargs.get('user') or kwargs.get('username')
|
320
|
-
self.password = kwargs.get('password')
|
321
|
-
|
322
|
-
# Set additional attributes
|
323
|
-
for key, value in kwargs.items():
|
324
|
-
if key not in ['host', 'port', 'database', 'dbname', 'user', 'username', 'password']:
|
325
|
-
setattr(self, key, value)
|
326
|
-
|
327
|
-
# Initialize with updated kwargs
|
328
|
-
super().initialize(**kwargs)
|
329
|
-
|
330
|
-
# Set up database directory path and ID
|
331
|
-
self.db_id = self.dbname
|
332
|
-
self._setup_directory_path(self.db_id)
|
333
|
-
|
334
|
-
logger.info(f"MySQL plugin initialized for database: {self.db_id} at {self.host}:{self.port}")
|
335
|
-
|
336
|
-
def _setup_directory_path(self, db_id: str) -> None:
|
337
|
-
"""Set up the database directory path."""
|
338
|
-
if isinstance(self.db_root_path, str):
|
339
|
-
self.db_root_path = Path(self.db_root_path)
|
340
|
-
|
341
|
-
self.db_directory_path = Path(self.db_root_path) / f"{self.db_mode}_databases" / db_id
|
342
|
-
self.db_id = db_id
|
343
|
-
|
344
|
-
# Reset LSH manager when directory path changes
|
345
|
-
self._lsh_manager = None
|
346
|
-
|
347
|
-
@property
|
348
|
-
def lsh_manager(self):
|
349
|
-
"""Lazy load LSH manager for backward compatibility."""
|
350
|
-
if self._lsh_manager is None and self.db_directory_path:
|
351
|
-
from ..lsh.manager import LshManager
|
352
|
-
self._lsh_manager = LshManager(self.db_directory_path)
|
353
|
-
return self._lsh_manager
|
354
|
-
|
355
|
-
# LSH integration methods for backward compatibility
|
356
|
-
def set_lsh(self) -> str:
|
357
|
-
"""Set LSH for backward compatibility."""
|
358
|
-
try:
|
359
|
-
if self.lsh_manager and self.lsh_manager.load_lsh():
|
360
|
-
return "success"
|
361
|
-
else:
|
362
|
-
return "error"
|
363
|
-
except Exception as e:
|
364
|
-
logger.error(f"Error loading LSH: {e}")
|
365
|
-
return "error"
|
366
|
-
|
367
|
-
def query_lsh(self, keyword: str, signature_size: int = 30, n_gram: int = 3, top_n: int = 10) -> Dict[str, Dict[str, List[str]]]:
|
368
|
-
"""Query LSH for backward compatibility."""
|
369
|
-
if self.lsh_manager:
|
370
|
-
try:
|
371
|
-
return self.lsh_manager.query(
|
372
|
-
keyword=keyword,
|
373
|
-
signature_size=signature_size,
|
374
|
-
n_gram=n_gram,
|
375
|
-
top_n=top_n
|
376
|
-
)
|
377
|
-
except Exception as e:
|
378
|
-
logger.error(f"LSH query failed: {e}")
|
379
|
-
raise Exception(f"Error querying LSH for {self.db_id}: {e}")
|
380
|
-
else:
|
381
|
-
raise Exception(f"LSH not available for {self.db_id}")
|
382
|
-
|
383
|
-
def get_connection_info(self) -> Dict[str, Any]:
|
384
|
-
"""Get connection information."""
|
385
|
-
base_info = super().get_plugin_info()
|
386
|
-
|
387
|
-
if self.adapter:
|
388
|
-
adapter_info = self.adapter.get_connection_info()
|
389
|
-
base_info.update(adapter_info)
|
390
|
-
|
391
|
-
base_info.update({
|
392
|
-
"db_id": self.db_id,
|
393
|
-
"host": self.host,
|
394
|
-
"port": self.port,
|
395
|
-
"database": self.dbname,
|
396
|
-
"user": self.user,
|
397
|
-
"db_directory_path": str(self.db_directory_path) if self.db_directory_path else None,
|
398
|
-
"lsh_available": self.lsh_manager is not None
|
399
|
-
})
|
400
|
-
|
401
|
-
return base_info
|
402
|
-
|
403
|
-
def get_example_data(self, table_name: str, number_of_rows: int = 30) -> Dict[str, List[Any]]:
|
404
|
-
"""Get example data through adapter."""
|
405
|
-
if self.adapter:
|
406
|
-
return self.adapter.get_example_data(table_name, number_of_rows)
|
407
|
-
else:
|
408
|
-
raise RuntimeError("Plugin not initialized")
|