structured2graph 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- __init__.py +47 -0
- core/__init__.py +23 -0
- core/hygm/__init__.py +74 -0
- core/hygm/hygm.py +2351 -0
- core/hygm/models/__init__.py +82 -0
- core/hygm/models/graph_models.py +667 -0
- core/hygm/models/llm_models.py +229 -0
- core/hygm/models/operations.py +176 -0
- core/hygm/models/sources.py +68 -0
- core/hygm/models/user_operations.py +139 -0
- core/hygm/strategies/__init__.py +17 -0
- core/hygm/strategies/base.py +36 -0
- core/hygm/strategies/deterministic.py +262 -0
- core/hygm/strategies/llm.py +904 -0
- core/hygm/validation/__init__.py +38 -0
- core/hygm/validation/base.py +194 -0
- core/hygm/validation/graph_schema_validator.py +687 -0
- core/hygm/validation/memgraph_data_validator.py +991 -0
- core/migration_agent.py +1369 -0
- core/schema/spec.json +155 -0
- core/utils/meta_graph.py +108 -0
- database/__init__.py +36 -0
- database/adapters/__init__.py +11 -0
- database/adapters/memgraph.py +318 -0
- database/adapters/mysql.py +311 -0
- database/adapters/postgresql.py +335 -0
- database/analyzer.py +396 -0
- database/factory.py +219 -0
- database/models.py +209 -0
- main.py +518 -0
- query_generation/__init__.py +20 -0
- query_generation/cypher_generator.py +129 -0
- query_generation/schema_utilities.py +88 -0
- structured2graph-0.1.1.dist-info/METADATA +197 -0
- structured2graph-0.1.1.dist-info/RECORD +41 -0
- structured2graph-0.1.1.dist-info/WHEEL +4 -0
- structured2graph-0.1.1.dist-info/entry_points.txt +2 -0
- structured2graph-0.1.1.dist-info/licenses/LICENSE +21 -0
- utils/__init__.py +57 -0
- utils/config.py +235 -0
- utils/environment.py +404 -0
database/analyzer.py
ADDED
|
@@ -0,0 +1,396 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Abstract analyzer interface for database systems.
|
|
3
|
+
|
|
4
|
+
This module defines the abstract base class that all database analyzers
|
|
5
|
+
must implement to ensure compatibility with the migration system.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from abc import ABC, abstractmethod
|
|
9
|
+
from typing import Dict, List, Any, Optional
|
|
10
|
+
from .models import (
|
|
11
|
+
DatabaseStructure,
|
|
12
|
+
TableInfo,
|
|
13
|
+
ColumnInfo,
|
|
14
|
+
ForeignKeyInfo,
|
|
15
|
+
RelationshipInfo,
|
|
16
|
+
TableType,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class DatabaseAnalyzer(ABC):
|
|
21
|
+
"""
|
|
22
|
+
Abstract base class for database analyzers.
|
|
23
|
+
|
|
24
|
+
All database-specific analyzers must implement this interface to ensure
|
|
25
|
+
compatibility with HyGM and the migration system.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def __init__(self, connection_config: Dict[str, Any]):
|
|
29
|
+
"""
|
|
30
|
+
Initialize the database analyzer.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
connection_config: Database-specific connection configuration
|
|
34
|
+
"""
|
|
35
|
+
self.connection_config = connection_config
|
|
36
|
+
self.connection = None
|
|
37
|
+
self.database_type = self._get_database_type()
|
|
38
|
+
|
|
39
|
+
@abstractmethod
|
|
40
|
+
def _get_database_type(self) -> str:
|
|
41
|
+
"""Return the type of database (e.g., 'mysql', 'postgresql')."""
|
|
42
|
+
pass
|
|
43
|
+
|
|
44
|
+
@abstractmethod
|
|
45
|
+
def connect(self) -> bool:
|
|
46
|
+
"""
|
|
47
|
+
Establish connection to the database.
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
True if connection successful, False otherwise
|
|
51
|
+
"""
|
|
52
|
+
pass
|
|
53
|
+
|
|
54
|
+
@abstractmethod
|
|
55
|
+
def disconnect(self) -> None:
|
|
56
|
+
"""Close the database connection."""
|
|
57
|
+
pass
|
|
58
|
+
|
|
59
|
+
@abstractmethod
|
|
60
|
+
def get_tables(self) -> List[str]:
|
|
61
|
+
"""
|
|
62
|
+
Get list of all tables in the database.
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
List of table names
|
|
66
|
+
"""
|
|
67
|
+
pass
|
|
68
|
+
|
|
69
|
+
@abstractmethod
|
|
70
|
+
def get_table_schema(self, table_name: str) -> List[ColumnInfo]:
|
|
71
|
+
"""
|
|
72
|
+
Get schema information for a specific table.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
table_name: Name of the table
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
List of ColumnInfo objects describing the table schema
|
|
79
|
+
"""
|
|
80
|
+
pass
|
|
81
|
+
|
|
82
|
+
@abstractmethod
|
|
83
|
+
def get_foreign_keys(self, table_name: str) -> List[ForeignKeyInfo]:
|
|
84
|
+
"""
|
|
85
|
+
Get foreign key relationships for a table.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
table_name: Name of the table
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
List of ForeignKeyInfo objects
|
|
92
|
+
"""
|
|
93
|
+
pass
|
|
94
|
+
|
|
95
|
+
@abstractmethod
|
|
96
|
+
def get_table_data(
|
|
97
|
+
self, table_name: str, limit: Optional[int] = None
|
|
98
|
+
) -> List[Dict[str, Any]]:
|
|
99
|
+
"""
|
|
100
|
+
Get data from a specific table.
|
|
101
|
+
|
|
102
|
+
Args:
|
|
103
|
+
table_name: Name of the table
|
|
104
|
+
limit: Maximum number of rows to return
|
|
105
|
+
|
|
106
|
+
Returns:
|
|
107
|
+
List of dictionaries representing rows
|
|
108
|
+
"""
|
|
109
|
+
pass
|
|
110
|
+
|
|
111
|
+
@abstractmethod
|
|
112
|
+
def get_table_row_count(self, table_name: str) -> int:
|
|
113
|
+
"""
|
|
114
|
+
Get the number of rows in a table.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
table_name: Name of the table
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
Number of rows in the table
|
|
121
|
+
"""
|
|
122
|
+
pass
|
|
123
|
+
|
|
124
|
+
@abstractmethod
|
|
125
|
+
def is_view(self, table_name: str) -> bool:
|
|
126
|
+
"""
|
|
127
|
+
Check if a table is actually a view.
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
table_name: Name of the table
|
|
131
|
+
|
|
132
|
+
Returns:
|
|
133
|
+
True if the table is a view, False otherwise
|
|
134
|
+
"""
|
|
135
|
+
pass
|
|
136
|
+
|
|
137
|
+
@abstractmethod
|
|
138
|
+
def get_indexes(self, table_name: str) -> List[Dict[str, Any]]:
|
|
139
|
+
"""
|
|
140
|
+
Get indexes for a specific table.
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
table_name: Name of the table
|
|
144
|
+
|
|
145
|
+
Returns:
|
|
146
|
+
List of dictionaries representing the table's indexes
|
|
147
|
+
"""
|
|
148
|
+
pass
|
|
149
|
+
|
|
150
|
+
def is_connected(self) -> bool:
|
|
151
|
+
"""
|
|
152
|
+
Check if the database connection is active.
|
|
153
|
+
|
|
154
|
+
Returns:
|
|
155
|
+
True if connected, False otherwise
|
|
156
|
+
"""
|
|
157
|
+
return self.connection is not None
|
|
158
|
+
|
|
159
|
+
def get_connection_info(self) -> Dict[str, Any]:
|
|
160
|
+
"""
|
|
161
|
+
Get connection information (excluding sensitive data like passwords).
|
|
162
|
+
|
|
163
|
+
Returns:
|
|
164
|
+
Dictionary with connection information
|
|
165
|
+
"""
|
|
166
|
+
safe_config = self.connection_config.copy()
|
|
167
|
+
if "password" in safe_config:
|
|
168
|
+
safe_config["password"] = "***"
|
|
169
|
+
return {
|
|
170
|
+
"database_type": self.database_type,
|
|
171
|
+
"config": safe_config,
|
|
172
|
+
"connected": self.is_connected(),
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
def get_migration_config(self) -> Dict[str, str]:
|
|
176
|
+
"""
|
|
177
|
+
Get connection config formatted for migration tools.
|
|
178
|
+
|
|
179
|
+
Returns:
|
|
180
|
+
Dictionary with string values suitable for migration tools
|
|
181
|
+
"""
|
|
182
|
+
config = self.connection_config.copy()
|
|
183
|
+
|
|
184
|
+
# Ensure all values are strings for compatibility
|
|
185
|
+
migration_config = {}
|
|
186
|
+
for key, value in config.items():
|
|
187
|
+
if key == "password" and value is None:
|
|
188
|
+
migration_config[key] = ""
|
|
189
|
+
else:
|
|
190
|
+
migration_config[key] = str(value)
|
|
191
|
+
|
|
192
|
+
return migration_config
|
|
193
|
+
|
|
194
|
+
def is_join_table(self, table_info: TableInfo) -> bool:
|
|
195
|
+
"""
|
|
196
|
+
Determine if a table is a join table (many-to-many).
|
|
197
|
+
|
|
198
|
+
This implementation is database-agnostic and can be overridden
|
|
199
|
+
if database-specific logic is needed.
|
|
200
|
+
|
|
201
|
+
Args:
|
|
202
|
+
table_info: TableInfo object
|
|
203
|
+
|
|
204
|
+
Returns:
|
|
205
|
+
True if the table is a join table, False otherwise
|
|
206
|
+
"""
|
|
207
|
+
# A join table typically has:
|
|
208
|
+
# 1. Only foreign key columns (and maybe an ID or timestamp)
|
|
209
|
+
# 2. At least 2 foreign keys
|
|
210
|
+
# 3. Small number of total columns
|
|
211
|
+
|
|
212
|
+
if len(table_info.foreign_keys) < 2:
|
|
213
|
+
return False
|
|
214
|
+
|
|
215
|
+
# Count non-FK columns (excluding common metadata columns)
|
|
216
|
+
non_fk_columns = []
|
|
217
|
+
fk_column_names = {fk.column_name for fk in table_info.foreign_keys}
|
|
218
|
+
metadata_columns = {
|
|
219
|
+
"id",
|
|
220
|
+
"created_at",
|
|
221
|
+
"updated_at",
|
|
222
|
+
"created_on",
|
|
223
|
+
"updated_on",
|
|
224
|
+
"timestamp",
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
for col in table_info.columns:
|
|
228
|
+
field_name = col.name.lower()
|
|
229
|
+
if col.name not in fk_column_names and field_name not in metadata_columns:
|
|
230
|
+
non_fk_columns.append(col.name)
|
|
231
|
+
|
|
232
|
+
# If most columns are foreign keys, it's likely a join table
|
|
233
|
+
total_columns = len(table_info.columns)
|
|
234
|
+
fk_ratio = len(table_info.foreign_keys) / total_columns
|
|
235
|
+
|
|
236
|
+
# Consider it a join table if:
|
|
237
|
+
# - At least 2 FKs and FK ratio > 0.5, OR
|
|
238
|
+
# - All columns are FKs or metadata columns
|
|
239
|
+
return (len(table_info.foreign_keys) >= 2 and fk_ratio > 0.5) or len(
|
|
240
|
+
non_fk_columns
|
|
241
|
+
) == 0
|
|
242
|
+
|
|
243
|
+
def determine_table_type(self, table_info: TableInfo) -> TableType:
|
|
244
|
+
"""
|
|
245
|
+
Determine the type of table.
|
|
246
|
+
|
|
247
|
+
Args:
|
|
248
|
+
table_info: TableInfo object
|
|
249
|
+
|
|
250
|
+
Returns:
|
|
251
|
+
TableType enum value
|
|
252
|
+
"""
|
|
253
|
+
# Check if it's a view first
|
|
254
|
+
if self.is_view(table_info.name):
|
|
255
|
+
return TableType.VIEW
|
|
256
|
+
|
|
257
|
+
if self.is_join_table(table_info):
|
|
258
|
+
return TableType.JOIN
|
|
259
|
+
elif len(table_info.foreign_keys) == 0:
|
|
260
|
+
return TableType.ENTITY # Pure entity table with no references
|
|
261
|
+
else:
|
|
262
|
+
return TableType.ENTITY # Entity table with references
|
|
263
|
+
|
|
264
|
+
def get_database_structure(self) -> DatabaseStructure:
|
|
265
|
+
"""
|
|
266
|
+
Get complete database structure including tables, schemas, and relationships.
|
|
267
|
+
|
|
268
|
+
This method provides a standardized database structure that works
|
|
269
|
+
with HyGM regardless of the underlying database system.
|
|
270
|
+
|
|
271
|
+
Returns:
|
|
272
|
+
DatabaseStructure object containing all database information
|
|
273
|
+
"""
|
|
274
|
+
tables = {}
|
|
275
|
+
entity_tables = {}
|
|
276
|
+
join_tables = {}
|
|
277
|
+
view_tables = {}
|
|
278
|
+
relationships = []
|
|
279
|
+
sample_data = {}
|
|
280
|
+
table_counts = {}
|
|
281
|
+
|
|
282
|
+
# Get all tables
|
|
283
|
+
all_table_names = self.get_tables()
|
|
284
|
+
|
|
285
|
+
# First pass: collect table information
|
|
286
|
+
for table_name in all_table_names:
|
|
287
|
+
columns = self.get_table_schema(table_name)
|
|
288
|
+
foreign_keys = self.get_foreign_keys(table_name)
|
|
289
|
+
row_count = self.get_table_row_count(table_name)
|
|
290
|
+
|
|
291
|
+
# Get primary keys
|
|
292
|
+
primary_keys = [col.name for col in columns if col.is_primary_key]
|
|
293
|
+
|
|
294
|
+
# Get indexes for this table
|
|
295
|
+
try:
|
|
296
|
+
table_indexes = self.get_indexes(table_name)
|
|
297
|
+
except (NotImplementedError, AttributeError):
|
|
298
|
+
# If get_indexes is not implemented, use empty list
|
|
299
|
+
table_indexes = []
|
|
300
|
+
|
|
301
|
+
# Create TableInfo object
|
|
302
|
+
table_info = TableInfo(
|
|
303
|
+
name=table_name,
|
|
304
|
+
table_type=TableType.ENTITY, # Will be determined later
|
|
305
|
+
columns=columns,
|
|
306
|
+
foreign_keys=foreign_keys,
|
|
307
|
+
row_count=row_count,
|
|
308
|
+
primary_keys=primary_keys,
|
|
309
|
+
indexes=table_indexes,
|
|
310
|
+
)
|
|
311
|
+
|
|
312
|
+
# Determine table type
|
|
313
|
+
table_info.table_type = self.determine_table_type(table_info)
|
|
314
|
+
|
|
315
|
+
tables[table_name] = table_info
|
|
316
|
+
table_counts[table_name] = row_count
|
|
317
|
+
|
|
318
|
+
# Categorize tables
|
|
319
|
+
if table_info.table_type == TableType.VIEW:
|
|
320
|
+
view_tables[table_name] = table_info
|
|
321
|
+
elif table_info.table_type == TableType.JOIN:
|
|
322
|
+
join_tables[table_name] = table_info
|
|
323
|
+
else:
|
|
324
|
+
entity_tables[table_name] = table_info
|
|
325
|
+
|
|
326
|
+
# Get sample data (limit to 3 rows for performance)
|
|
327
|
+
try:
|
|
328
|
+
sample_data[table_name] = self.get_table_data(table_name, limit=3)
|
|
329
|
+
except Exception:
|
|
330
|
+
sample_data[table_name] = []
|
|
331
|
+
|
|
332
|
+
# Second pass: create relationships
|
|
333
|
+
for table_name, table_info in tables.items():
|
|
334
|
+
if table_info.table_type == TableType.JOIN:
|
|
335
|
+
# Handle join tables as many-to-many relationships
|
|
336
|
+
fks = table_info.foreign_keys
|
|
337
|
+
if len(fks) >= 2:
|
|
338
|
+
# Create a many-to-many relationship
|
|
339
|
+
fk1, fk2 = fks[0], fks[1]
|
|
340
|
+
|
|
341
|
+
# Get additional properties from non-FK columns
|
|
342
|
+
fk_columns = {fk.column_name for fk in fks}
|
|
343
|
+
additional_properties = []
|
|
344
|
+
metadata_columns = {
|
|
345
|
+
"id",
|
|
346
|
+
"created_at",
|
|
347
|
+
"updated_at",
|
|
348
|
+
"created_on",
|
|
349
|
+
"updated_on",
|
|
350
|
+
"timestamp",
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
for col in table_info.columns:
|
|
354
|
+
if (
|
|
355
|
+
col.name not in fk_columns
|
|
356
|
+
and col.name.lower() not in metadata_columns
|
|
357
|
+
):
|
|
358
|
+
additional_properties.append(col.name)
|
|
359
|
+
|
|
360
|
+
relationships.append(
|
|
361
|
+
RelationshipInfo(
|
|
362
|
+
relationship_type="many_to_many",
|
|
363
|
+
from_table=fk1.referenced_table,
|
|
364
|
+
from_column=fk1.referenced_column,
|
|
365
|
+
to_table=fk2.referenced_table,
|
|
366
|
+
to_column=fk2.referenced_column,
|
|
367
|
+
join_table=table_name,
|
|
368
|
+
join_from_column=fk1.column_name,
|
|
369
|
+
join_to_column=fk2.column_name,
|
|
370
|
+
additional_properties=additional_properties,
|
|
371
|
+
)
|
|
372
|
+
)
|
|
373
|
+
else:
|
|
374
|
+
# Handle regular foreign key relationships
|
|
375
|
+
for fk in table_info.foreign_keys:
|
|
376
|
+
relationships.append(
|
|
377
|
+
RelationshipInfo(
|
|
378
|
+
relationship_type="one_to_many",
|
|
379
|
+
from_table=table_name,
|
|
380
|
+
from_column=fk.column_name,
|
|
381
|
+
to_table=fk.referenced_table,
|
|
382
|
+
to_column=fk.referenced_column,
|
|
383
|
+
)
|
|
384
|
+
)
|
|
385
|
+
|
|
386
|
+
return DatabaseStructure(
|
|
387
|
+
tables=tables,
|
|
388
|
+
entity_tables=entity_tables,
|
|
389
|
+
join_tables=join_tables,
|
|
390
|
+
view_tables=view_tables,
|
|
391
|
+
relationships=relationships,
|
|
392
|
+
sample_data=sample_data,
|
|
393
|
+
table_counts=table_counts,
|
|
394
|
+
database_name=self.connection_config.get("database", "unknown"),
|
|
395
|
+
database_type=self.database_type,
|
|
396
|
+
)
|
database/factory.py
ADDED
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Database analyzer factory for creating database-specific analyzers.
|
|
3
|
+
|
|
4
|
+
This module provides a factory pattern for creating appropriate database
|
|
5
|
+
analyzers based on the database type or connection parameters.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import Dict, Type
|
|
9
|
+
from .analyzer import DatabaseAnalyzer
|
|
10
|
+
from .adapters.mysql import MySQLAnalyzer
|
|
11
|
+
from .adapters.postgresql import PostgreSQLAnalyzer
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class DatabaseAnalyzerFactory:
|
|
15
|
+
"""Factory for creating database-specific analyzers."""
|
|
16
|
+
|
|
17
|
+
# Registry of available analyzers
|
|
18
|
+
_analyzers: Dict[str, Type[DatabaseAnalyzer]] = {
|
|
19
|
+
"mysql": MySQLAnalyzer,
|
|
20
|
+
"postgresql": PostgreSQLAnalyzer,
|
|
21
|
+
# Future database types can be added here:
|
|
22
|
+
# "duckdb": DuckDBAnalyzer,
|
|
23
|
+
# "oracle": OracleAnalyzer,
|
|
24
|
+
# "sqlserver": SQLServerAnalyzer,
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
@classmethod
|
|
28
|
+
def create_analyzer(
|
|
29
|
+
cls, database_type: str, **connection_params
|
|
30
|
+
) -> DatabaseAnalyzer:
|
|
31
|
+
"""
|
|
32
|
+
Create a database analyzer for the specified database type.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
database_type: Type of database (mysql, postgresql, etc.)
|
|
36
|
+
**connection_params: Database-specific connection parameters
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
DatabaseAnalyzer instance
|
|
40
|
+
|
|
41
|
+
Raises:
|
|
42
|
+
ValueError: If database type is not supported
|
|
43
|
+
"""
|
|
44
|
+
database_type = database_type.lower()
|
|
45
|
+
|
|
46
|
+
if database_type not in cls._analyzers:
|
|
47
|
+
supported_types = ", ".join(cls._analyzers.keys())
|
|
48
|
+
raise ValueError(
|
|
49
|
+
f"Unsupported database type: {database_type}. "
|
|
50
|
+
f"Supported types: {supported_types}"
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
# Create analyzer with appropriate parameters based on database type
|
|
54
|
+
if database_type == "mysql":
|
|
55
|
+
return MySQLAnalyzer(
|
|
56
|
+
host=connection_params.get("host", "localhost"),
|
|
57
|
+
user=connection_params.get("user", "root"),
|
|
58
|
+
password=connection_params.get("password", ""),
|
|
59
|
+
database=connection_params.get("database") or "",
|
|
60
|
+
port=connection_params.get("port", 3306),
|
|
61
|
+
)
|
|
62
|
+
if database_type == "postgresql":
|
|
63
|
+
return PostgreSQLAnalyzer(
|
|
64
|
+
host=connection_params.get("host", "localhost"),
|
|
65
|
+
user=connection_params.get("user", "postgres"),
|
|
66
|
+
password=connection_params.get("password", ""),
|
|
67
|
+
database=connection_params.get("database") or "",
|
|
68
|
+
port=connection_params.get("port", 5432),
|
|
69
|
+
schema=connection_params.get("schema", "public"),
|
|
70
|
+
)
|
|
71
|
+
# elif database_type == "duckdb":
|
|
72
|
+
# return analyzer_class(
|
|
73
|
+
# database_path=connection_params.get("database_path"),
|
|
74
|
+
# )
|
|
75
|
+
|
|
76
|
+
# This should never be reached due to the check above
|
|
77
|
+
raise ValueError(f"No implementation for database type: {database_type}")
|
|
78
|
+
|
|
79
|
+
@classmethod
|
|
80
|
+
def create_from_uri(cls, database_uri: str) -> DatabaseAnalyzer:
|
|
81
|
+
"""
|
|
82
|
+
Create a database analyzer from a database URI.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
database_uri: Database connection URI
|
|
86
|
+
(e.g., mysql://user:pass@host/db)
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
DatabaseAnalyzer instance
|
|
90
|
+
|
|
91
|
+
Raises:
|
|
92
|
+
ValueError: If URI format is invalid or database type unsupported
|
|
93
|
+
"""
|
|
94
|
+
try:
|
|
95
|
+
# Parse the URI
|
|
96
|
+
if "://" not in database_uri:
|
|
97
|
+
raise ValueError("Invalid URI format: missing protocol")
|
|
98
|
+
|
|
99
|
+
protocol, rest = database_uri.split("://", 1)
|
|
100
|
+
database_type = protocol.lower()
|
|
101
|
+
|
|
102
|
+
# Parse connection parameters based on database type
|
|
103
|
+
if database_type == "mysql":
|
|
104
|
+
return cls._parse_mysql_uri(rest)
|
|
105
|
+
if database_type == "postgresql":
|
|
106
|
+
return cls._parse_postgresql_uri(rest)
|
|
107
|
+
# elif database_type == "duckdb":
|
|
108
|
+
# return cls._parse_duckdb_uri(rest)
|
|
109
|
+
else:
|
|
110
|
+
raise ValueError(f"Unsupported database type in URI: {database_type}")
|
|
111
|
+
|
|
112
|
+
except Exception as e:
|
|
113
|
+
raise ValueError(f"Failed to parse database URI: {e}") from e
|
|
114
|
+
|
|
115
|
+
@classmethod
|
|
116
|
+
def _parse_mysql_uri(cls, uri_part: str) -> MySQLAnalyzer:
|
|
117
|
+
"""Parse MySQL URI and create analyzer."""
|
|
118
|
+
# Format: user:password@host:port/database
|
|
119
|
+
if "@" not in uri_part:
|
|
120
|
+
raise ValueError("Invalid MySQL URI: missing credentials")
|
|
121
|
+
|
|
122
|
+
credentials, host_db = uri_part.split("@", 1)
|
|
123
|
+
|
|
124
|
+
# Parse credentials
|
|
125
|
+
if ":" in credentials:
|
|
126
|
+
user, password = credentials.split(":", 1)
|
|
127
|
+
else:
|
|
128
|
+
user = credentials
|
|
129
|
+
password = ""
|
|
130
|
+
|
|
131
|
+
# Parse host, port, and database
|
|
132
|
+
if "/" not in host_db:
|
|
133
|
+
raise ValueError("Invalid MySQL URI: missing database name")
|
|
134
|
+
|
|
135
|
+
host_port, database = host_db.rsplit("/", 1)
|
|
136
|
+
|
|
137
|
+
if ":" in host_port:
|
|
138
|
+
host, port_str = host_port.split(":", 1)
|
|
139
|
+
port = int(port_str)
|
|
140
|
+
else:
|
|
141
|
+
host = host_port
|
|
142
|
+
port = 3306
|
|
143
|
+
|
|
144
|
+
return MySQLAnalyzer(
|
|
145
|
+
host=host,
|
|
146
|
+
user=user,
|
|
147
|
+
password=password,
|
|
148
|
+
database=database,
|
|
149
|
+
port=port,
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
@classmethod
|
|
153
|
+
def _parse_postgresql_uri(cls, uri_part: str) -> PostgreSQLAnalyzer:
|
|
154
|
+
"""Parse PostgreSQL URI and create analyzer."""
|
|
155
|
+
|
|
156
|
+
if "@" not in uri_part:
|
|
157
|
+
raise ValueError("Invalid PostgreSQL URI: missing credentials")
|
|
158
|
+
|
|
159
|
+
credentials, host_db = uri_part.split("@", 1)
|
|
160
|
+
|
|
161
|
+
if ":" in credentials:
|
|
162
|
+
user, password = credentials.split(":", 1)
|
|
163
|
+
else:
|
|
164
|
+
user = credentials
|
|
165
|
+
password = ""
|
|
166
|
+
|
|
167
|
+
if "/" not in host_db:
|
|
168
|
+
raise ValueError("Invalid PostgreSQL URI: missing database name")
|
|
169
|
+
|
|
170
|
+
host_port, database = host_db.rsplit("/", 1)
|
|
171
|
+
|
|
172
|
+
schema = "public"
|
|
173
|
+
if "?" in database:
|
|
174
|
+
database, query = database.split("?", 1)
|
|
175
|
+
for part in query.split("&"):
|
|
176
|
+
if part.startswith("schema="):
|
|
177
|
+
schema = part.split("=", 1)[1] or "public"
|
|
178
|
+
|
|
179
|
+
if ":" in host_port:
|
|
180
|
+
host, port_str = host_port.split(":", 1)
|
|
181
|
+
port = int(port_str)
|
|
182
|
+
else:
|
|
183
|
+
host = host_port
|
|
184
|
+
port = 5432
|
|
185
|
+
|
|
186
|
+
return PostgreSQLAnalyzer(
|
|
187
|
+
host=host,
|
|
188
|
+
user=user,
|
|
189
|
+
password=password,
|
|
190
|
+
database=database,
|
|
191
|
+
port=port,
|
|
192
|
+
schema=schema,
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
@classmethod
|
|
196
|
+
def get_supported_databases(cls) -> list[str]:
|
|
197
|
+
"""
|
|
198
|
+
Get list of supported database types.
|
|
199
|
+
|
|
200
|
+
Returns:
|
|
201
|
+
List of supported database type strings
|
|
202
|
+
"""
|
|
203
|
+
return list(cls._analyzers.keys())
|
|
204
|
+
|
|
205
|
+
@classmethod
|
|
206
|
+
def register_analyzer(
|
|
207
|
+
cls, database_type: str, analyzer_class: Type[DatabaseAnalyzer]
|
|
208
|
+
) -> None:
|
|
209
|
+
"""
|
|
210
|
+
Register a new database analyzer.
|
|
211
|
+
|
|
212
|
+
This allows for extending the factory with new database types
|
|
213
|
+
without modifying the core factory code.
|
|
214
|
+
|
|
215
|
+
Args:
|
|
216
|
+
database_type: String identifier for the database type
|
|
217
|
+
analyzer_class: DatabaseAnalyzer subclass for this database type
|
|
218
|
+
"""
|
|
219
|
+
cls._analyzers[database_type.lower()] = analyzer_class
|