structured2graph 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- __init__.py +47 -0
- core/__init__.py +23 -0
- core/hygm/__init__.py +74 -0
- core/hygm/hygm.py +2351 -0
- core/hygm/models/__init__.py +82 -0
- core/hygm/models/graph_models.py +667 -0
- core/hygm/models/llm_models.py +229 -0
- core/hygm/models/operations.py +176 -0
- core/hygm/models/sources.py +68 -0
- core/hygm/models/user_operations.py +139 -0
- core/hygm/strategies/__init__.py +17 -0
- core/hygm/strategies/base.py +36 -0
- core/hygm/strategies/deterministic.py +262 -0
- core/hygm/strategies/llm.py +904 -0
- core/hygm/validation/__init__.py +38 -0
- core/hygm/validation/base.py +194 -0
- core/hygm/validation/graph_schema_validator.py +687 -0
- core/hygm/validation/memgraph_data_validator.py +991 -0
- core/migration_agent.py +1369 -0
- core/schema/spec.json +155 -0
- core/utils/meta_graph.py +108 -0
- database/__init__.py +36 -0
- database/adapters/__init__.py +11 -0
- database/adapters/memgraph.py +318 -0
- database/adapters/mysql.py +311 -0
- database/adapters/postgresql.py +335 -0
- database/analyzer.py +396 -0
- database/factory.py +219 -0
- database/models.py +209 -0
- main.py +518 -0
- query_generation/__init__.py +20 -0
- query_generation/cypher_generator.py +129 -0
- query_generation/schema_utilities.py +88 -0
- structured2graph-0.1.1.dist-info/METADATA +197 -0
- structured2graph-0.1.1.dist-info/RECORD +41 -0
- structured2graph-0.1.1.dist-info/WHEEL +4 -0
- structured2graph-0.1.1.dist-info/entry_points.txt +2 -0
- structured2graph-0.1.1.dist-info/licenses/LICENSE +21 -0
- utils/__init__.py +57 -0
- utils/config.py +235 -0
- utils/environment.py +404 -0
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Schema transformation utilities for SQL to graph migration.
|
|
3
|
+
Provides utilities for transforming SQL schema elements to graph equivalents.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import logging
|
|
7
|
+
|
|
8
|
+
logger = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class SchemaUtilities:
|
|
12
|
+
"""Utilities for schema transformation and naming conventions."""
|
|
13
|
+
|
|
14
|
+
@staticmethod
|
|
15
|
+
def table_name_to_label(table_name: str) -> str:
|
|
16
|
+
"""Convert table name to Cypher node label.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
table_name: SQL table name (e.g., 'user_profiles')
|
|
20
|
+
|
|
21
|
+
Returns:
|
|
22
|
+
Cypher label in PascalCase (e.g., 'UserProfiles')
|
|
23
|
+
"""
|
|
24
|
+
return "".join(word.capitalize() for word in table_name.split("_"))
|
|
25
|
+
|
|
26
|
+
@staticmethod
|
|
27
|
+
def column_name_to_property(column_name: str) -> str:
|
|
28
|
+
"""Convert column name to graph property name.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
column_name: SQL column name
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
Property name (currently unchanged, but extensible)
|
|
35
|
+
"""
|
|
36
|
+
return column_name
|
|
37
|
+
|
|
38
|
+
@staticmethod
|
|
39
|
+
def generate_relationship_name(to_table: str, join_table: str | None = None) -> str:
|
|
40
|
+
"""Generate semantic relationship name.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
to_table: Target table name
|
|
44
|
+
join_table: Join table name for many-to-many relationships
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
Relationship name in UPPER_CASE
|
|
48
|
+
"""
|
|
49
|
+
if join_table:
|
|
50
|
+
return SchemaUtilities.table_name_to_label(join_table).upper()
|
|
51
|
+
else:
|
|
52
|
+
to_label = SchemaUtilities.table_name_to_label(to_table).upper()
|
|
53
|
+
return f"HAS_{to_label}"
|
|
54
|
+
|
|
55
|
+
@staticmethod
|
|
56
|
+
def is_metadata_column(column_name: str) -> bool:
|
|
57
|
+
"""Check if a column is a metadata/system column.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
column_name: Name of the column to check
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
True if it's a metadata column, False otherwise
|
|
64
|
+
"""
|
|
65
|
+
metadata_columns = {
|
|
66
|
+
"id",
|
|
67
|
+
"created_at",
|
|
68
|
+
"updated_at",
|
|
69
|
+
"created_on",
|
|
70
|
+
"updated_on",
|
|
71
|
+
"timestamp",
|
|
72
|
+
"version",
|
|
73
|
+
"deleted_at",
|
|
74
|
+
"modified_at",
|
|
75
|
+
}
|
|
76
|
+
return column_name.lower() in metadata_columns
|
|
77
|
+
|
|
78
|
+
@staticmethod
|
|
79
|
+
def is_foreign_key_column(column_name: str) -> bool:
|
|
80
|
+
"""Check if a column name suggests it's a foreign key.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
column_name: Name of the column to check
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
True if it appears to be a foreign key, False otherwise
|
|
87
|
+
"""
|
|
88
|
+
return column_name.lower().endswith("_id") and column_name.lower() != "id"
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: structured2graph
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: Database migration agent from structured data (e.g. SQL) to graph.
|
|
5
|
+
Project-URL: Homepage, https://github.com/memgraph/ai-toolkit
|
|
6
|
+
Project-URL: Repository, https://github.com/memgraph/ai-toolkit
|
|
7
|
+
Project-URL: Issues, https://github.com/memgraph/ai-toolkit/issues
|
|
8
|
+
Project-URL: Documentation, https://github.com/memgraph/ai-toolkit/tree/main/agents/sql2graph
|
|
9
|
+
Author-email: Memgraph <tech@memgraph.com>
|
|
10
|
+
Maintainer-email: Memgraph <tech@memgraph.com>
|
|
11
|
+
License-Expression: MIT
|
|
12
|
+
License-File: LICENSE
|
|
13
|
+
Keywords: database-migration,etl,graph-database,knowledge-graph,memgraph,sql-to-graph
|
|
14
|
+
Classifier: Development Status :: 3 - Alpha
|
|
15
|
+
Classifier: Intended Audience :: Developers
|
|
16
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
22
|
+
Classifier: Topic :: Database
|
|
23
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
24
|
+
Requires-Python: >=3.10
|
|
25
|
+
Requires-Dist: anthropic>=0.40.0
|
|
26
|
+
Requires-Dist: langchain-anthropic>=0.3.0
|
|
27
|
+
Requires-Dist: langchain-core>=1.0.0
|
|
28
|
+
Requires-Dist: langchain-google-genai>=2.0.0
|
|
29
|
+
Requires-Dist: langchain-openai>=0.2.0
|
|
30
|
+
Requires-Dist: langchain>=1.0.0
|
|
31
|
+
Requires-Dist: langgraph>=0.2.0
|
|
32
|
+
Requires-Dist: memgraph-toolbox>=0.1.4
|
|
33
|
+
Requires-Dist: mysql-connector-python>=9.0.0
|
|
34
|
+
Requires-Dist: neo4j>=5.0.0
|
|
35
|
+
Requires-Dist: openai>=1.0.0
|
|
36
|
+
Requires-Dist: psycopg2-binary>=2.9
|
|
37
|
+
Requires-Dist: pydantic>=2.0.0
|
|
38
|
+
Requires-Dist: pymysql>=1.1.0
|
|
39
|
+
Requires-Dist: python-dotenv>=1.0.0
|
|
40
|
+
Requires-Dist: sqlalchemy>=2.0.0
|
|
41
|
+
Description-Content-Type: text/markdown
|
|
42
|
+
|
|
43
|
+
# SQL Database to Graph Migration Agent
|
|
44
|
+
|
|
45
|
+
Intelligent database migration agent that transforms SQL databases (MySQL, PostgreSQL) into graph databases, powered by LLM analysis and LangGraph workflows.
|
|
46
|
+
|
|
47
|
+
## Overview
|
|
48
|
+
|
|
49
|
+
This package provides a sophisticated migration agent that:
|
|
50
|
+
|
|
51
|
+
- **Analyzes SQL database schemas** - Automatically discovers tables, relationships, and constraints
|
|
52
|
+
- **Generates optimal graph models** - Uses AI to create node and relationship structures
|
|
53
|
+
- **Creates indexes and constraints** - Ensures performance and data integrity
|
|
54
|
+
- **Handles complex relationships** - Converts foreign keys to graph relationships
|
|
55
|
+
- **Incremental refinement** - Review each table, adjust the model
|
|
56
|
+
immediately, then enter the interactive refinement loop once all tables
|
|
57
|
+
are processed
|
|
58
|
+
- **Comprehensive validation** - Verifies migration results and data integrity
|
|
59
|
+
|
|
60
|
+
## Installation
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
# Install the package
|
|
64
|
+
uv pip install .
|
|
65
|
+
|
|
66
|
+
# Or install in development mode
|
|
67
|
+
uv pip install -e .
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## Quick Start
|
|
71
|
+
|
|
72
|
+
Run the migration agent:
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
uv run main
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
The agent will guide you through:
|
|
79
|
+
|
|
80
|
+
1. Environment setup and database connections
|
|
81
|
+
2. Graph modeling strategy selection
|
|
82
|
+
3. Automatic or incremental migration mode
|
|
83
|
+
4. Complete migration workflow with progress tracking
|
|
84
|
+
|
|
85
|
+
> **Incremental review:** The LLM now drafts the entire graph model in a single
|
|
86
|
+
> shot and then walks you through table-level changes detected since the last
|
|
87
|
+
> migration. You only need to approve (or tweak) the differences that matter.
|
|
88
|
+
|
|
89
|
+
You can also preconfigure the workflow using CLI flags or environment variables:
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
uv run main --mode incremental --strategy llm --meta-graph reset --log-level DEBUG
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
| Option | Environment | Description |
|
|
96
|
+
| -------------------------------------- | -------------------- | ------------------------------------------------------------- |
|
|
97
|
+
| `--mode {automatic,incremental}` | `SQL2MG_MODE` | Selects automatic or incremental modeling flow. |
|
|
98
|
+
| `--strategy {deterministic,llm}` | `SQL2MG_STRATEGY` | Chooses deterministic or LLM-powered HyGM strategy. |
|
|
99
|
+
| `--provider {openai,anthropic,gemini}` | `LLM_PROVIDER` | Selects LLM provider (auto-detects if not specified). |
|
|
100
|
+
| `--model MODEL_NAME` | `LLM_MODEL` | Specifies LLM model name (uses provider default if not set). |
|
|
101
|
+
| `--meta-graph {auto,skip,reset}` | `SQL2MG_META_POLICY` | Controls how stored meta graph data is used (default `auto`). |
|
|
102
|
+
| `--log-level LEVEL` | `SQL2MG_LOG_LEVEL` | Sets logging verbosity (`DEBUG`, `INFO`, etc.). |
|
|
103
|
+
|
|
104
|
+
## Configuration
|
|
105
|
+
|
|
106
|
+
Set up your environment variables in `.env`:
|
|
107
|
+
|
|
108
|
+
```bash
|
|
109
|
+
# Select source database (mysql or postgresql)
|
|
110
|
+
SOURCE_DB_TYPE=postgresql
|
|
111
|
+
|
|
112
|
+
# PostgreSQL Database (used when SOURCE_DB_TYPE=postgresql)
|
|
113
|
+
POSTGRES_HOST=localhost
|
|
114
|
+
POSTGRES_PORT=5432
|
|
115
|
+
POSTGRES_DATABASE=pagila
|
|
116
|
+
POSTGRES_USER=username
|
|
117
|
+
POSTGRES_PASSWORD=password
|
|
118
|
+
POSTGRES_SCHEMA=public
|
|
119
|
+
|
|
120
|
+
# MySQL Database (used when SOURCE_DB_TYPE=mysql)
|
|
121
|
+
MYSQL_HOST=localhost
|
|
122
|
+
MYSQL_PORT=3306
|
|
123
|
+
MYSQL_DATABASE=sakila
|
|
124
|
+
MYSQL_USER=username
|
|
125
|
+
MYSQL_PASSWORD=password
|
|
126
|
+
|
|
127
|
+
# Memgraph Database
|
|
128
|
+
MEMGRAPH_URL=bolt://localhost:7687
|
|
129
|
+
MEMGRAPH_USERNAME=
|
|
130
|
+
MEMGRAPH_PASSWORD=
|
|
131
|
+
MEMGRAPH_DATABASE=memgraph
|
|
132
|
+
|
|
133
|
+
# LLM API Keys (for LLM-powered features - choose one or more)
|
|
134
|
+
OPENAI_API_KEY=your_openai_key # For GPT models
|
|
135
|
+
# ANTHROPIC_API_KEY=your_anthropic_key # For Claude models
|
|
136
|
+
# GOOGLE_API_KEY=your_google_key # For Gemini models
|
|
137
|
+
|
|
138
|
+
# LLM Provider Configuration (optional - auto-detects if not set)
|
|
139
|
+
# LLM_PROVIDER=openai # Options: openai, anthropic, gemini
|
|
140
|
+
# LLM_MODEL=gpt-4o-mini # Specific model name
|
|
141
|
+
|
|
142
|
+
# Optional migration defaults (override CLI prompts)
|
|
143
|
+
SQL2MG_MODE=automatic
|
|
144
|
+
SQL2MG_STRATEGY=deterministic
|
|
145
|
+
SQL2MG_META_POLICY=auto
|
|
146
|
+
SQL2MG_LOG_LEVEL=INFO
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
When switching `SOURCE_DB_TYPE` remember to update the matching credential block and rerun `uv sync` so dependencies like `psycopg2-binary` are installed for PostgreSQL support.
|
|
150
|
+
|
|
151
|
+
Make sure that Memgraph is started with the `--schema-info-enabled=true`, since agent uses the schema information from Memgraph `SHOW SCHEMA INFO`.
|
|
152
|
+
|
|
153
|
+
## Multi-LLM Provider Support
|
|
154
|
+
|
|
155
|
+
The agent supports multiple LLM providers for AI-powered graph modeling:
|
|
156
|
+
|
|
157
|
+
### Supported Providers
|
|
158
|
+
|
|
159
|
+
- **OpenAI** (GPT models) - Default: `gpt-4o-mini`
|
|
160
|
+
- **Anthropic** (Claude models) - Default: `claude-3-5-sonnet-20241022`
|
|
161
|
+
- **Google** (Gemini models) - Default: `gemini-1.5-pro`
|
|
162
|
+
|
|
163
|
+
### Usage Examples
|
|
164
|
+
|
|
165
|
+
```bash
|
|
166
|
+
# Auto-detect provider based on API keys
|
|
167
|
+
uv run main --strategy llm
|
|
168
|
+
|
|
169
|
+
# Use specific provider
|
|
170
|
+
uv run main --strategy llm --provider anthropic
|
|
171
|
+
|
|
172
|
+
# Use specific model
|
|
173
|
+
uv run main --strategy llm --provider openai --model gpt-4o
|
|
174
|
+
|
|
175
|
+
# All options together
|
|
176
|
+
uv run main --mode incremental --strategy llm --provider gemini --model gemini-1.5-flash
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
All providers support **structured outputs** for consistent graph model generation. The system automatically validates schemas using Pydantic models.
|
|
180
|
+
|
|
181
|
+
📖 **[Full Multi-Provider Documentation](docs/MULTI_PROVIDER_SUPPORT.md)**
|
|
182
|
+
|
|
183
|
+
# Arhitecture
|
|
184
|
+
|
|
185
|
+
```
|
|
186
|
+
core/hygm/
|
|
187
|
+
├── hygm.py # Main orchestrator class
|
|
188
|
+
├── models/ # Data models and structures
|
|
189
|
+
│ ├── graph_models.py # Core graph representation
|
|
190
|
+
│ ├── llm_models.py # LLM-specific models
|
|
191
|
+
│ ├── operations.py # Interactive operations
|
|
192
|
+
│ └── sources.py # Source tracking
|
|
193
|
+
└── strategies/ # Modeling strategies
|
|
194
|
+
├── base.py # Abstract interface
|
|
195
|
+
├── deterministic.py # Rule-based modeling
|
|
196
|
+
└── llm.py # AI-powered modeling
|
|
197
|
+
```
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
core/__init__.py,sha256=H6KdK_X5BwtN59bPkA58KTdIS2mGTxYz_0Dzd_Bu3M4,509
|
|
2
|
+
core/migration_agent.py,sha256=mN2mAllkvp9Pv04hlK21YAC0XQy73uuxV3Nq1-M6qpw,53428
|
|
3
|
+
core/hygm/__init__.py,sha256=SAHaWatqR5bp5eYHJBgOGWrYtkLmXvS3T6rUMNtZv1E,1809
|
|
4
|
+
core/hygm/hygm.py,sha256=4SgL1qRHg9FuGP9_pUVjUOG1iiXgA9q-_DqpUJ-8aok,93958
|
|
5
|
+
core/hygm/models/__init__.py,sha256=1CE9wQt-Df_RJZb4wgZKInFdpysqbDD1OVNubaPIdWU,1848
|
|
6
|
+
core/hygm/models/graph_models.py,sha256=pvBaqYxFIDZWpEu-wt7kQO8ClOUBq2d3JNwofcxGTyE,24268
|
|
7
|
+
core/hygm/models/llm_models.py,sha256=pKjxanUmX2O6-JPbmTkzD5K22YlvggYF-Zcy86J0w4A,8022
|
|
8
|
+
core/hygm/models/operations.py,sha256=MVgywKczqk22NC94TIbKcIJYyD4b6KdDfXOFf41KxMU,6039
|
|
9
|
+
core/hygm/models/sources.py,sha256=bJx-ldfgKsg8JUSzTODkUWL4D3uJLPubGxmIaC3Xjl4,2174
|
|
10
|
+
core/hygm/models/user_operations.py,sha256=13vvOq4HdwqoZEWw2FTdyzT6NMtXp8gXd4nFjLMwauk,6333
|
|
11
|
+
core/hygm/strategies/__init__.py,sha256=JVerSHnI_tMPLE_IJv2QAIGrPSFKhDAjyaev5EBDITw,464
|
|
12
|
+
core/hygm/strategies/base.py,sha256=WmIp0BOvJmYmh8-aSWFRS1i38JZQkcwvUK6bhtz0EEk,973
|
|
13
|
+
core/hygm/strategies/deterministic.py,sha256=cxFrDqMY6ka_yRK-u4SLxFqRT_sA3TuRm1G1ty4dHnE,10035
|
|
14
|
+
core/hygm/strategies/llm.py,sha256=s0_1c8IgbU7otAOSo2hLUpw3xJxeDubbEoB1gBegfc4,36439
|
|
15
|
+
core/hygm/validation/__init__.py,sha256=myoFL6jqLn2qyNHb-0ME2m9TkZroMp6ERp3g11yAiNY,957
|
|
16
|
+
core/hygm/validation/base.py,sha256=3IxFtgAwDu30wYvsuX2Ykvp39-KCkz3N_8DPs9u42Uw,5687
|
|
17
|
+
core/hygm/validation/graph_schema_validator.py,sha256=QIWzUBuo-v2eybSVA7BWjsc3BO_-m0snSrhVmjg-hPI,28895
|
|
18
|
+
core/hygm/validation/memgraph_data_validator.py,sha256=QeuCRxjB1IP5KyfkMaiX0Xy7-vi98mLWezV1EQQFOiQ,39290
|
|
19
|
+
core/schema/spec.json,sha256=Om_CWtAY4yWq9M_CfpZj2T1ubr1Yt1XlpKncshc6yNw,5411
|
|
20
|
+
core/utils/meta_graph.py,sha256=r4eSfqjonKtY-CIaB9h9UIVzmarWloge9EmlZrsxAYU,3683
|
|
21
|
+
database/__init__.py,sha256=_39-82CK_BoqZ___NHCfNKMBol5tYvzRm9JObvzPB0g,729
|
|
22
|
+
database/analyzer.py,sha256=KPIn0qW9sgQPtBEwgoL9hbJGjdA8Uz5LUMUyLQ4ezrA,12492
|
|
23
|
+
database/factory.py,sha256=TCcZQFJ427qzp6XeQL-07nnWBarOUn2rMZsdPkj4FUo,7313
|
|
24
|
+
database/models.py,sha256=sEFRNXdzvfRLIO4QYbkiCOF1efYmfB3hb10dfZT90eU,6289
|
|
25
|
+
database/adapters/__init__.py,sha256=XBPy5A_91XfkcJMvmhl8sYPIXEQHGJQQ3x5da9t05Vw,212
|
|
26
|
+
database/adapters/memgraph.py,sha256=KWEep0Q5zkGLXeKg1Q-mthPdzsWKPeN9DqOCU1bOTDc,10132
|
|
27
|
+
database/adapters/mysql.py,sha256=3BNp_4ffbfIbVUIWWURbpYQzKTjLfOf-iqfZle0qt4E,10512
|
|
28
|
+
database/adapters/postgresql.py,sha256=LzI_RaLiiGtSN-rLWPTBT-Okf-OvtArI758MAJunQ2E,11264
|
|
29
|
+
query_generation/__init__.py,sha256=dcJD3LqI55LrtoHyfruM0D5xrGmOD7zvyfbTOMMxrs0,469
|
|
30
|
+
query_generation/cypher_generator.py,sha256=vQSkv0XfMBMS7owxvoLeDVfvXoZ6MI0tbIllcGK378k,4640
|
|
31
|
+
query_generation/schema_utilities.py,sha256=W_p_V5VNmVFbegNnFwrIi0G7v7WRt9xHA5P5BeXxNTQ,2550
|
|
32
|
+
utils/__init__.py,sha256=6FW-aebS9wuwWR6izR8uvKFfmmK7RGA6iixnNdNdMGc,1558
|
|
33
|
+
utils/config.py,sha256=pGyQ-sESg1HtpHIWEus_xYG9KEqatnWL7bZqjUEGMCI,8218
|
|
34
|
+
utils/environment.py,sha256=UtM1HkQe5znFVaEzXtpNK01GHy1J0cjHzidyoG4EmgE,13798
|
|
35
|
+
__init__.py,sha256=PbyEy0zP99byR1LTjH0ImWtJ0S5M2ZyTuUjp0i-KKKw,1314
|
|
36
|
+
main.py,sha256=EnUw0cEAxezlo9hJUnUlm4xey0M8kdVzbmm9WJYWugg,17042
|
|
37
|
+
structured2graph-0.1.1.dist-info/METADATA,sha256=Ai50jsvZ16Sl5nqg8qMPnICfxzKyzjQcKsqOvrE9_KY,7453
|
|
38
|
+
structured2graph-0.1.1.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
39
|
+
structured2graph-0.1.1.dist-info/entry_points.txt,sha256=kdLlrnE_KtaBIqsKIj0oSS_qwEiO9eDYLcGD3JmOrBs,47
|
|
40
|
+
structured2graph-0.1.1.dist-info/licenses/LICENSE,sha256=ZzJm0xDxij_hPJDfDVZ2SYNYaIN-e1CA3TnB94I09Rs,1070
|
|
41
|
+
structured2graph-0.1.1.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Memgraph Ltd.
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
utils/__init__.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Utilities package for the SQL to graph migration agent.
|
|
3
|
+
|
|
4
|
+
This package contains reusable utility modules for environment management,
|
|
5
|
+
database probing, configuration management, and other common functionality.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from .environment import (
|
|
9
|
+
MigrationEnvironmentError,
|
|
10
|
+
DatabaseConnectionError,
|
|
11
|
+
load_environment,
|
|
12
|
+
get_required_environment_variables,
|
|
13
|
+
get_optional_environment_variables,
|
|
14
|
+
validate_environment_variables,
|
|
15
|
+
get_source_db_config,
|
|
16
|
+
get_memgraph_config,
|
|
17
|
+
probe_source_connection,
|
|
18
|
+
probe_memgraph_connection,
|
|
19
|
+
validate_llm_providers,
|
|
20
|
+
setup_and_validate_environment,
|
|
21
|
+
probe_all_connections,
|
|
22
|
+
print_environment_help,
|
|
23
|
+
print_troubleshooting_help,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
from .config import (
|
|
27
|
+
MigrationConfig,
|
|
28
|
+
get_preset_config,
|
|
29
|
+
merge_config_with_preset,
|
|
30
|
+
print_config_summary,
|
|
31
|
+
get_available_presets,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
__all__ = [
|
|
35
|
+
# Environment utilities
|
|
36
|
+
"MigrationEnvironmentError",
|
|
37
|
+
"DatabaseConnectionError",
|
|
38
|
+
"load_environment",
|
|
39
|
+
"get_required_environment_variables",
|
|
40
|
+
"get_optional_environment_variables",
|
|
41
|
+
"validate_environment_variables",
|
|
42
|
+
"get_source_db_config",
|
|
43
|
+
"get_memgraph_config",
|
|
44
|
+
"probe_source_connection",
|
|
45
|
+
"probe_memgraph_connection",
|
|
46
|
+
"validate_llm_providers",
|
|
47
|
+
"setup_and_validate_environment",
|
|
48
|
+
"probe_all_connections",
|
|
49
|
+
"print_environment_help",
|
|
50
|
+
"print_troubleshooting_help",
|
|
51
|
+
# Configuration utilities
|
|
52
|
+
"MigrationConfig",
|
|
53
|
+
"get_preset_config",
|
|
54
|
+
"merge_config_with_preset",
|
|
55
|
+
"print_config_summary",
|
|
56
|
+
"get_available_presets",
|
|
57
|
+
]
|
utils/config.py
ADDED
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Configuration management utilities for the migration agent.
|
|
3
|
+
|
|
4
|
+
This module handles configuration parsing, validation, and default settings
|
|
5
|
+
for different environments and use cases.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import os
|
|
9
|
+
from typing import Dict, Any, Optional
|
|
10
|
+
from dataclasses import dataclass
|
|
11
|
+
|
|
12
|
+
from .environment import get_source_db_type
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class MigrationConfig:
|
|
17
|
+
"""Configuration class for migration settings."""
|
|
18
|
+
|
|
19
|
+
# Source database settings
|
|
20
|
+
source_db_type: str
|
|
21
|
+
source_db_host: str
|
|
22
|
+
source_db_user: str
|
|
23
|
+
source_db_password: str
|
|
24
|
+
source_db_database: str
|
|
25
|
+
source_db_port: int
|
|
26
|
+
source_db_schema: Optional[str]
|
|
27
|
+
|
|
28
|
+
# Memgraph settings
|
|
29
|
+
memgraph_url: str
|
|
30
|
+
memgraph_username: str
|
|
31
|
+
memgraph_password: str
|
|
32
|
+
memgraph_database: str
|
|
33
|
+
|
|
34
|
+
# OpenAI settings
|
|
35
|
+
openai_api_key: str
|
|
36
|
+
|
|
37
|
+
# Migration settings
|
|
38
|
+
relationship_naming_strategy: str = "table_based"
|
|
39
|
+
interactive_table_selection: bool = True
|
|
40
|
+
|
|
41
|
+
@classmethod
|
|
42
|
+
def from_environment(cls) -> "MigrationConfig":
|
|
43
|
+
"""Create configuration from environment variables."""
|
|
44
|
+
db_type = get_source_db_type()
|
|
45
|
+
if db_type == "postgresql":
|
|
46
|
+
source_host = os.getenv("POSTGRES_HOST", "localhost")
|
|
47
|
+
source_user = os.getenv("POSTGRES_USER", "postgres")
|
|
48
|
+
source_password = os.getenv("POSTGRES_PASSWORD", "")
|
|
49
|
+
source_database = os.getenv("POSTGRES_DATABASE", "postgres")
|
|
50
|
+
source_port = int(os.getenv("POSTGRES_PORT", "5432"))
|
|
51
|
+
source_schema = os.getenv("POSTGRES_SCHEMA", "public")
|
|
52
|
+
else:
|
|
53
|
+
source_host = os.getenv("MYSQL_HOST", "host.docker.internal")
|
|
54
|
+
source_user = os.getenv("MYSQL_USER", "root")
|
|
55
|
+
source_password = os.getenv("MYSQL_PASSWORD", "")
|
|
56
|
+
source_database = os.getenv("MYSQL_DATABASE", "sakila")
|
|
57
|
+
source_port = int(os.getenv("MYSQL_PORT", "3306"))
|
|
58
|
+
source_schema = None
|
|
59
|
+
|
|
60
|
+
return cls(
|
|
61
|
+
source_db_type=db_type,
|
|
62
|
+
source_db_host=source_host,
|
|
63
|
+
source_db_user=source_user,
|
|
64
|
+
source_db_password=source_password,
|
|
65
|
+
source_db_database=source_database,
|
|
66
|
+
source_db_port=source_port,
|
|
67
|
+
source_db_schema=source_schema,
|
|
68
|
+
memgraph_url=os.getenv("MEMGRAPH_URL", "bolt://localhost:7687"),
|
|
69
|
+
memgraph_username=os.getenv("MEMGRAPH_USERNAME", ""),
|
|
70
|
+
memgraph_password=os.getenv("MEMGRAPH_PASSWORD", ""),
|
|
71
|
+
memgraph_database=os.getenv("MEMGRAPH_DATABASE", "memgraph"),
|
|
72
|
+
openai_api_key=os.getenv("OPENAI_API_KEY", ""),
|
|
73
|
+
relationship_naming_strategy=os.getenv(
|
|
74
|
+
"RELATIONSHIP_NAMING_STRATEGY", "table_based"
|
|
75
|
+
),
|
|
76
|
+
interactive_table_selection=os.getenv(
|
|
77
|
+
"INTERACTIVE_TABLE_SELECTION", "true"
|
|
78
|
+
).lower()
|
|
79
|
+
== "true",
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
def to_source_db_config(self) -> Dict[str, Any]:
|
|
83
|
+
"""Convert to a dictionary suitable for analyzer creation."""
|
|
84
|
+
config: Dict[str, Any] = {
|
|
85
|
+
"database_type": self.source_db_type,
|
|
86
|
+
"host": self.source_db_host,
|
|
87
|
+
"user": self.source_db_user,
|
|
88
|
+
"password": self.source_db_password,
|
|
89
|
+
"database": self.source_db_database,
|
|
90
|
+
"port": self.source_db_port,
|
|
91
|
+
}
|
|
92
|
+
if self.source_db_type == "postgresql" and self.source_db_schema:
|
|
93
|
+
config["schema"] = self.source_db_schema
|
|
94
|
+
return config
|
|
95
|
+
|
|
96
|
+
def to_memgraph_config(self) -> Dict[str, str]:
|
|
97
|
+
"""Convert to Memgraph configuration dictionary."""
|
|
98
|
+
return {
|
|
99
|
+
"url": self.memgraph_url,
|
|
100
|
+
"username": self.memgraph_username,
|
|
101
|
+
"password": self.memgraph_password,
|
|
102
|
+
"database": self.memgraph_database,
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
def validate(self) -> tuple[bool, list[str]]:
|
|
106
|
+
"""
|
|
107
|
+
Validate the configuration.
|
|
108
|
+
|
|
109
|
+
Returns:
|
|
110
|
+
Tuple of (is_valid, validation_errors)
|
|
111
|
+
"""
|
|
112
|
+
errors: list[str] = []
|
|
113
|
+
|
|
114
|
+
if not self.openai_api_key:
|
|
115
|
+
errors.append("OPENAI_API_KEY is required")
|
|
116
|
+
|
|
117
|
+
if self.source_db_type == "postgresql":
|
|
118
|
+
if not 1 <= self.source_db_port <= 65535:
|
|
119
|
+
errors.append(f"Invalid PostgreSQL port: {self.source_db_port}")
|
|
120
|
+
else:
|
|
121
|
+
if not 1 <= self.source_db_port <= 65535:
|
|
122
|
+
errors.append(f"Invalid MySQL port: {self.source_db_port}")
|
|
123
|
+
|
|
124
|
+
valid_strategies = ["table_based", "llm"]
|
|
125
|
+
if self.relationship_naming_strategy not in valid_strategies:
|
|
126
|
+
errors.append(
|
|
127
|
+
"Invalid relationship_naming_strategy: "
|
|
128
|
+
f"{self.relationship_naming_strategy}. Must be one of: "
|
|
129
|
+
f"{valid_strategies}"
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
return len(errors) == 0, errors
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def get_preset_config(preset_name: str) -> Optional[Dict[str, Any]]:
|
|
136
|
+
"""
|
|
137
|
+
Get a preset configuration for common scenarios.
|
|
138
|
+
|
|
139
|
+
Args:
|
|
140
|
+
preset_name: Name of the preset configuration
|
|
141
|
+
|
|
142
|
+
Returns:
|
|
143
|
+
Dictionary with preset configuration values or None if not found
|
|
144
|
+
"""
|
|
145
|
+
presets = {
|
|
146
|
+
"local_development": {
|
|
147
|
+
"source_db_type": "mysql",
|
|
148
|
+
"source_db_host": "localhost",
|
|
149
|
+
"source_db_port": 3306,
|
|
150
|
+
"source_db_user": "root",
|
|
151
|
+
"source_db_database": "sakila",
|
|
152
|
+
"memgraph_url": "bolt://localhost:7687",
|
|
153
|
+
"relationship_naming_strategy": "table_based",
|
|
154
|
+
"interactive_table_selection": True,
|
|
155
|
+
},
|
|
156
|
+
"docker_development": {
|
|
157
|
+
"source_db_type": "mysql",
|
|
158
|
+
"source_db_host": "host.docker.internal",
|
|
159
|
+
"source_db_port": 3306,
|
|
160
|
+
"source_db_user": "root",
|
|
161
|
+
"source_db_database": "sakila",
|
|
162
|
+
"memgraph_url": "bolt://localhost:7687",
|
|
163
|
+
"relationship_naming_strategy": "table_based",
|
|
164
|
+
"interactive_table_selection": True,
|
|
165
|
+
},
|
|
166
|
+
"production": {
|
|
167
|
+
"source_db_type": "mysql",
|
|
168
|
+
"source_db_host": "mysql-server",
|
|
169
|
+
"source_db_port": 3306,
|
|
170
|
+
"source_db_user": "migration_user",
|
|
171
|
+
"memgraph_url": "bolt://memgraph-server:7687",
|
|
172
|
+
"relationship_naming_strategy": "llm",
|
|
173
|
+
"interactive_table_selection": False,
|
|
174
|
+
},
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
return presets.get(preset_name)
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def merge_config_with_preset(
|
|
181
|
+
config: MigrationConfig, preset_name: str
|
|
182
|
+
) -> MigrationConfig:
|
|
183
|
+
"""
|
|
184
|
+
Merge configuration with a preset, keeping existing values.
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
config: Existing configuration
|
|
188
|
+
preset_name: Name of the preset to merge
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
New configuration with preset values applied where not set
|
|
192
|
+
"""
|
|
193
|
+
preset = get_preset_config(preset_name)
|
|
194
|
+
if not preset:
|
|
195
|
+
return config
|
|
196
|
+
|
|
197
|
+
config_dict = config.__dict__.copy()
|
|
198
|
+
|
|
199
|
+
for key, preset_value in preset.items():
|
|
200
|
+
if key not in config_dict:
|
|
201
|
+
continue
|
|
202
|
+
current_value = config_dict[key]
|
|
203
|
+
if (
|
|
204
|
+
not current_value
|
|
205
|
+
or (key == "source_db_host" and current_value == "host.docker.internal")
|
|
206
|
+
or (key == "memgraph_url" and current_value == "bolt://localhost:7687")
|
|
207
|
+
):
|
|
208
|
+
config_dict[key] = preset_value
|
|
209
|
+
|
|
210
|
+
return MigrationConfig(**config_dict)
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def print_config_summary(config: MigrationConfig) -> None:
|
|
214
|
+
"""Print a summary of the configuration."""
|
|
215
|
+
print("🔧 Configuration Summary:")
|
|
216
|
+
print("-" * 30)
|
|
217
|
+
source_details = (
|
|
218
|
+
f"Source DB: {config.source_db_type}://"
|
|
219
|
+
f"{config.source_db_user}@{config.source_db_host}:"
|
|
220
|
+
f"{config.source_db_port}"
|
|
221
|
+
)
|
|
222
|
+
print(source_details)
|
|
223
|
+
print(f"Database: {config.source_db_database}")
|
|
224
|
+
if config.source_db_type == "postgresql" and config.source_db_schema:
|
|
225
|
+
print(f"Schema: {config.source_db_schema}")
|
|
226
|
+
print(f"Memgraph: {config.memgraph_url}")
|
|
227
|
+
print(f"Strategy: {config.relationship_naming_strategy}")
|
|
228
|
+
print(f"Interactive: {config.interactive_table_selection}")
|
|
229
|
+
print(f"OpenAI API: {'✅ Set' if config.openai_api_key else '❌ Missing'}")
|
|
230
|
+
print()
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def get_available_presets() -> list[str]:
|
|
234
|
+
"""Get a list of available preset names."""
|
|
235
|
+
return ["local_development", "docker_development", "production"]
|