structured2graph 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. __init__.py +47 -0
  2. core/__init__.py +23 -0
  3. core/hygm/__init__.py +74 -0
  4. core/hygm/hygm.py +2351 -0
  5. core/hygm/models/__init__.py +82 -0
  6. core/hygm/models/graph_models.py +667 -0
  7. core/hygm/models/llm_models.py +229 -0
  8. core/hygm/models/operations.py +176 -0
  9. core/hygm/models/sources.py +68 -0
  10. core/hygm/models/user_operations.py +139 -0
  11. core/hygm/strategies/__init__.py +17 -0
  12. core/hygm/strategies/base.py +36 -0
  13. core/hygm/strategies/deterministic.py +262 -0
  14. core/hygm/strategies/llm.py +904 -0
  15. core/hygm/validation/__init__.py +38 -0
  16. core/hygm/validation/base.py +194 -0
  17. core/hygm/validation/graph_schema_validator.py +687 -0
  18. core/hygm/validation/memgraph_data_validator.py +991 -0
  19. core/migration_agent.py +1369 -0
  20. core/schema/spec.json +155 -0
  21. core/utils/meta_graph.py +108 -0
  22. database/__init__.py +36 -0
  23. database/adapters/__init__.py +11 -0
  24. database/adapters/memgraph.py +318 -0
  25. database/adapters/mysql.py +311 -0
  26. database/adapters/postgresql.py +335 -0
  27. database/analyzer.py +396 -0
  28. database/factory.py +219 -0
  29. database/models.py +209 -0
  30. main.py +518 -0
  31. query_generation/__init__.py +20 -0
  32. query_generation/cypher_generator.py +129 -0
  33. query_generation/schema_utilities.py +88 -0
  34. structured2graph-0.1.1.dist-info/METADATA +197 -0
  35. structured2graph-0.1.1.dist-info/RECORD +41 -0
  36. structured2graph-0.1.1.dist-info/WHEEL +4 -0
  37. structured2graph-0.1.1.dist-info/entry_points.txt +2 -0
  38. structured2graph-0.1.1.dist-info/licenses/LICENSE +21 -0
  39. utils/__init__.py +57 -0
  40. utils/config.py +235 -0
  41. utils/environment.py +404 -0
@@ -0,0 +1,88 @@
1
+ """
2
+ Schema transformation utilities for SQL to graph migration.
3
+ Provides utilities for transforming SQL schema elements to graph equivalents.
4
+ """
5
+
6
+ import logging
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+
11
+ class SchemaUtilities:
12
+ """Utilities for schema transformation and naming conventions."""
13
+
14
+ @staticmethod
15
+ def table_name_to_label(table_name: str) -> str:
16
+ """Convert table name to Cypher node label.
17
+
18
+ Args:
19
+ table_name: SQL table name (e.g., 'user_profiles')
20
+
21
+ Returns:
22
+ Cypher label in PascalCase (e.g., 'UserProfiles')
23
+ """
24
+ return "".join(word.capitalize() for word in table_name.split("_"))
25
+
26
+ @staticmethod
27
+ def column_name_to_property(column_name: str) -> str:
28
+ """Convert column name to graph property name.
29
+
30
+ Args:
31
+ column_name: SQL column name
32
+
33
+ Returns:
34
+ Property name (currently unchanged, but extensible)
35
+ """
36
+ return column_name
37
+
38
+ @staticmethod
39
+ def generate_relationship_name(to_table: str, join_table: str | None = None) -> str:
40
+ """Generate semantic relationship name.
41
+
42
+ Args:
43
+ to_table: Target table name
44
+ join_table: Join table name for many-to-many relationships
45
+
46
+ Returns:
47
+ Relationship name in UPPER_CASE
48
+ """
49
+ if join_table:
50
+ return SchemaUtilities.table_name_to_label(join_table).upper()
51
+ else:
52
+ to_label = SchemaUtilities.table_name_to_label(to_table).upper()
53
+ return f"HAS_{to_label}"
54
+
55
+ @staticmethod
56
+ def is_metadata_column(column_name: str) -> bool:
57
+ """Check if a column is a metadata/system column.
58
+
59
+ Args:
60
+ column_name: Name of the column to check
61
+
62
+ Returns:
63
+ True if it's a metadata column, False otherwise
64
+ """
65
+ metadata_columns = {
66
+ "id",
67
+ "created_at",
68
+ "updated_at",
69
+ "created_on",
70
+ "updated_on",
71
+ "timestamp",
72
+ "version",
73
+ "deleted_at",
74
+ "modified_at",
75
+ }
76
+ return column_name.lower() in metadata_columns
77
+
78
+ @staticmethod
79
+ def is_foreign_key_column(column_name: str) -> bool:
80
+ """Check if a column name suggests it's a foreign key.
81
+
82
+ Args:
83
+ column_name: Name of the column to check
84
+
85
+ Returns:
86
+ True if it appears to be a foreign key, False otherwise
87
+ """
88
+ return column_name.lower().endswith("_id") and column_name.lower() != "id"
@@ -0,0 +1,197 @@
1
+ Metadata-Version: 2.4
2
+ Name: structured2graph
3
+ Version: 0.1.1
4
+ Summary: Database migration agent from structured data (e.g. SQL) to graph.
5
+ Project-URL: Homepage, https://github.com/memgraph/ai-toolkit
6
+ Project-URL: Repository, https://github.com/memgraph/ai-toolkit
7
+ Project-URL: Issues, https://github.com/memgraph/ai-toolkit/issues
8
+ Project-URL: Documentation, https://github.com/memgraph/ai-toolkit/tree/main/agents/sql2graph
9
+ Author-email: Memgraph <tech@memgraph.com>
10
+ Maintainer-email: Memgraph <tech@memgraph.com>
11
+ License-Expression: MIT
12
+ License-File: LICENSE
13
+ Keywords: database-migration,etl,graph-database,knowledge-graph,memgraph,sql-to-graph
14
+ Classifier: Development Status :: 3 - Alpha
15
+ Classifier: Intended Audience :: Developers
16
+ Classifier: License :: OSI Approved :: MIT License
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Programming Language :: Python :: 3.13
22
+ Classifier: Topic :: Database
23
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
24
+ Requires-Python: >=3.10
25
+ Requires-Dist: anthropic>=0.40.0
26
+ Requires-Dist: langchain-anthropic>=0.3.0
27
+ Requires-Dist: langchain-core>=1.0.0
28
+ Requires-Dist: langchain-google-genai>=2.0.0
29
+ Requires-Dist: langchain-openai>=0.2.0
30
+ Requires-Dist: langchain>=1.0.0
31
+ Requires-Dist: langgraph>=0.2.0
32
+ Requires-Dist: memgraph-toolbox>=0.1.4
33
+ Requires-Dist: mysql-connector-python>=9.0.0
34
+ Requires-Dist: neo4j>=5.0.0
35
+ Requires-Dist: openai>=1.0.0
36
+ Requires-Dist: psycopg2-binary>=2.9
37
+ Requires-Dist: pydantic>=2.0.0
38
+ Requires-Dist: pymysql>=1.1.0
39
+ Requires-Dist: python-dotenv>=1.0.0
40
+ Requires-Dist: sqlalchemy>=2.0.0
41
+ Description-Content-Type: text/markdown
42
+
43
+ # SQL Database to Graph Migration Agent
44
+
45
+ Intelligent database migration agent that transforms SQL databases (MySQL, PostgreSQL) into graph databases, powered by LLM analysis and LangGraph workflows.
46
+
47
+ ## Overview
48
+
49
+ This package provides a sophisticated migration agent that:
50
+
51
+ - **Analyzes SQL database schemas** - Automatically discovers tables, relationships, and constraints
52
+ - **Generates optimal graph models** - Uses AI to create node and relationship structures
53
+ - **Creates indexes and constraints** - Ensures performance and data integrity
54
+ - **Handles complex relationships** - Converts foreign keys to graph relationships
55
+ - **Incremental refinement** - Review each table, adjust the model
56
+ immediately, then enter the interactive refinement loop once all tables
57
+ are processed
58
+ - **Comprehensive validation** - Verifies migration results and data integrity
59
+
60
+ ## Installation
61
+
62
+ ```bash
63
+ # Install the package
64
+ uv pip install .
65
+
66
+ # Or install in development mode
67
+ uv pip install -e .
68
+ ```
69
+
70
+ ## Quick Start
71
+
72
+ Run the migration agent:
73
+
74
+ ```bash
75
+ uv run main
76
+ ```
77
+
78
+ The agent will guide you through:
79
+
80
+ 1. Environment setup and database connections
81
+ 2. Graph modeling strategy selection
82
+ 3. Automatic or incremental migration mode
83
+ 4. Complete migration workflow with progress tracking
84
+
85
+ > **Incremental review:** The LLM now drafts the entire graph model in a single
86
+ > shot and then walks you through table-level changes detected since the last
87
+ > migration. You only need to approve (or tweak) the differences that matter.
88
+
89
+ You can also preconfigure the workflow using CLI flags or environment variables:
90
+
91
+ ```bash
92
+ uv run main --mode incremental --strategy llm --meta-graph reset --log-level DEBUG
93
+ ```
94
+
95
+ | Option | Environment | Description |
96
+ | -------------------------------------- | -------------------- | ------------------------------------------------------------- |
97
+ | `--mode {automatic,incremental}` | `SQL2MG_MODE` | Selects automatic or incremental modeling flow. |
98
+ | `--strategy {deterministic,llm}` | `SQL2MG_STRATEGY` | Chooses deterministic or LLM-powered HyGM strategy. |
99
+ | `--provider {openai,anthropic,gemini}` | `LLM_PROVIDER` | Selects LLM provider (auto-detects if not specified). |
100
+ | `--model MODEL_NAME` | `LLM_MODEL` | Specifies LLM model name (uses provider default if not set). |
101
+ | `--meta-graph {auto,skip,reset}` | `SQL2MG_META_POLICY` | Controls how stored meta graph data is used (default `auto`). |
102
+ | `--log-level LEVEL` | `SQL2MG_LOG_LEVEL` | Sets logging verbosity (`DEBUG`, `INFO`, etc.). |
103
+
104
+ ## Configuration
105
+
106
+ Set up your environment variables in `.env`:
107
+
108
+ ```bash
109
+ # Select source database (mysql or postgresql)
110
+ SOURCE_DB_TYPE=postgresql
111
+
112
+ # PostgreSQL Database (used when SOURCE_DB_TYPE=postgresql)
113
+ POSTGRES_HOST=localhost
114
+ POSTGRES_PORT=5432
115
+ POSTGRES_DATABASE=pagila
116
+ POSTGRES_USER=username
117
+ POSTGRES_PASSWORD=password
118
+ POSTGRES_SCHEMA=public
119
+
120
+ # MySQL Database (used when SOURCE_DB_TYPE=mysql)
121
+ MYSQL_HOST=localhost
122
+ MYSQL_PORT=3306
123
+ MYSQL_DATABASE=sakila
124
+ MYSQL_USER=username
125
+ MYSQL_PASSWORD=password
126
+
127
+ # Memgraph Database
128
+ MEMGRAPH_URL=bolt://localhost:7687
129
+ MEMGRAPH_USERNAME=
130
+ MEMGRAPH_PASSWORD=
131
+ MEMGRAPH_DATABASE=memgraph
132
+
133
+ # LLM API Keys (for LLM-powered features - choose one or more)
134
+ OPENAI_API_KEY=your_openai_key # For GPT models
135
+ # ANTHROPIC_API_KEY=your_anthropic_key # For Claude models
136
+ # GOOGLE_API_KEY=your_google_key # For Gemini models
137
+
138
+ # LLM Provider Configuration (optional - auto-detects if not set)
139
+ # LLM_PROVIDER=openai # Options: openai, anthropic, gemini
140
+ # LLM_MODEL=gpt-4o-mini # Specific model name
141
+
142
+ # Optional migration defaults (override CLI prompts)
143
+ SQL2MG_MODE=automatic
144
+ SQL2MG_STRATEGY=deterministic
145
+ SQL2MG_META_POLICY=auto
146
+ SQL2MG_LOG_LEVEL=INFO
147
+ ```
148
+
149
+ When switching `SOURCE_DB_TYPE` remember to update the matching credential block and rerun `uv sync` so dependencies like `psycopg2-binary` are installed for PostgreSQL support.
150
+
151
+ Make sure that Memgraph is started with the `--schema-info-enabled=true`, since agent uses the schema information from Memgraph `SHOW SCHEMA INFO`.
152
+
153
+ ## Multi-LLM Provider Support
154
+
155
+ The agent supports multiple LLM providers for AI-powered graph modeling:
156
+
157
+ ### Supported Providers
158
+
159
+ - **OpenAI** (GPT models) - Default: `gpt-4o-mini`
160
+ - **Anthropic** (Claude models) - Default: `claude-3-5-sonnet-20241022`
161
+ - **Google** (Gemini models) - Default: `gemini-1.5-pro`
162
+
163
+ ### Usage Examples
164
+
165
+ ```bash
166
+ # Auto-detect provider based on API keys
167
+ uv run main --strategy llm
168
+
169
+ # Use specific provider
170
+ uv run main --strategy llm --provider anthropic
171
+
172
+ # Use specific model
173
+ uv run main --strategy llm --provider openai --model gpt-4o
174
+
175
+ # All options together
176
+ uv run main --mode incremental --strategy llm --provider gemini --model gemini-1.5-flash
177
+ ```
178
+
179
+ All providers support **structured outputs** for consistent graph model generation. The system automatically validates schemas using Pydantic models.
180
+
181
+ 📖 **[Full Multi-Provider Documentation](docs/MULTI_PROVIDER_SUPPORT.md)**
182
+
183
+ # Arhitecture
184
+
185
+ ```
186
+ core/hygm/
187
+ ├── hygm.py # Main orchestrator class
188
+ ├── models/ # Data models and structures
189
+ │ ├── graph_models.py # Core graph representation
190
+ │ ├── llm_models.py # LLM-specific models
191
+ │ ├── operations.py # Interactive operations
192
+ │ └── sources.py # Source tracking
193
+ └── strategies/ # Modeling strategies
194
+ ├── base.py # Abstract interface
195
+ ├── deterministic.py # Rule-based modeling
196
+ └── llm.py # AI-powered modeling
197
+ ```
@@ -0,0 +1,41 @@
1
+ core/__init__.py,sha256=H6KdK_X5BwtN59bPkA58KTdIS2mGTxYz_0Dzd_Bu3M4,509
2
+ core/migration_agent.py,sha256=mN2mAllkvp9Pv04hlK21YAC0XQy73uuxV3Nq1-M6qpw,53428
3
+ core/hygm/__init__.py,sha256=SAHaWatqR5bp5eYHJBgOGWrYtkLmXvS3T6rUMNtZv1E,1809
4
+ core/hygm/hygm.py,sha256=4SgL1qRHg9FuGP9_pUVjUOG1iiXgA9q-_DqpUJ-8aok,93958
5
+ core/hygm/models/__init__.py,sha256=1CE9wQt-Df_RJZb4wgZKInFdpysqbDD1OVNubaPIdWU,1848
6
+ core/hygm/models/graph_models.py,sha256=pvBaqYxFIDZWpEu-wt7kQO8ClOUBq2d3JNwofcxGTyE,24268
7
+ core/hygm/models/llm_models.py,sha256=pKjxanUmX2O6-JPbmTkzD5K22YlvggYF-Zcy86J0w4A,8022
8
+ core/hygm/models/operations.py,sha256=MVgywKczqk22NC94TIbKcIJYyD4b6KdDfXOFf41KxMU,6039
9
+ core/hygm/models/sources.py,sha256=bJx-ldfgKsg8JUSzTODkUWL4D3uJLPubGxmIaC3Xjl4,2174
10
+ core/hygm/models/user_operations.py,sha256=13vvOq4HdwqoZEWw2FTdyzT6NMtXp8gXd4nFjLMwauk,6333
11
+ core/hygm/strategies/__init__.py,sha256=JVerSHnI_tMPLE_IJv2QAIGrPSFKhDAjyaev5EBDITw,464
12
+ core/hygm/strategies/base.py,sha256=WmIp0BOvJmYmh8-aSWFRS1i38JZQkcwvUK6bhtz0EEk,973
13
+ core/hygm/strategies/deterministic.py,sha256=cxFrDqMY6ka_yRK-u4SLxFqRT_sA3TuRm1G1ty4dHnE,10035
14
+ core/hygm/strategies/llm.py,sha256=s0_1c8IgbU7otAOSo2hLUpw3xJxeDubbEoB1gBegfc4,36439
15
+ core/hygm/validation/__init__.py,sha256=myoFL6jqLn2qyNHb-0ME2m9TkZroMp6ERp3g11yAiNY,957
16
+ core/hygm/validation/base.py,sha256=3IxFtgAwDu30wYvsuX2Ykvp39-KCkz3N_8DPs9u42Uw,5687
17
+ core/hygm/validation/graph_schema_validator.py,sha256=QIWzUBuo-v2eybSVA7BWjsc3BO_-m0snSrhVmjg-hPI,28895
18
+ core/hygm/validation/memgraph_data_validator.py,sha256=QeuCRxjB1IP5KyfkMaiX0Xy7-vi98mLWezV1EQQFOiQ,39290
19
+ core/schema/spec.json,sha256=Om_CWtAY4yWq9M_CfpZj2T1ubr1Yt1XlpKncshc6yNw,5411
20
+ core/utils/meta_graph.py,sha256=r4eSfqjonKtY-CIaB9h9UIVzmarWloge9EmlZrsxAYU,3683
21
+ database/__init__.py,sha256=_39-82CK_BoqZ___NHCfNKMBol5tYvzRm9JObvzPB0g,729
22
+ database/analyzer.py,sha256=KPIn0qW9sgQPtBEwgoL9hbJGjdA8Uz5LUMUyLQ4ezrA,12492
23
+ database/factory.py,sha256=TCcZQFJ427qzp6XeQL-07nnWBarOUn2rMZsdPkj4FUo,7313
24
+ database/models.py,sha256=sEFRNXdzvfRLIO4QYbkiCOF1efYmfB3hb10dfZT90eU,6289
25
+ database/adapters/__init__.py,sha256=XBPy5A_91XfkcJMvmhl8sYPIXEQHGJQQ3x5da9t05Vw,212
26
+ database/adapters/memgraph.py,sha256=KWEep0Q5zkGLXeKg1Q-mthPdzsWKPeN9DqOCU1bOTDc,10132
27
+ database/adapters/mysql.py,sha256=3BNp_4ffbfIbVUIWWURbpYQzKTjLfOf-iqfZle0qt4E,10512
28
+ database/adapters/postgresql.py,sha256=LzI_RaLiiGtSN-rLWPTBT-Okf-OvtArI758MAJunQ2E,11264
29
+ query_generation/__init__.py,sha256=dcJD3LqI55LrtoHyfruM0D5xrGmOD7zvyfbTOMMxrs0,469
30
+ query_generation/cypher_generator.py,sha256=vQSkv0XfMBMS7owxvoLeDVfvXoZ6MI0tbIllcGK378k,4640
31
+ query_generation/schema_utilities.py,sha256=W_p_V5VNmVFbegNnFwrIi0G7v7WRt9xHA5P5BeXxNTQ,2550
32
+ utils/__init__.py,sha256=6FW-aebS9wuwWR6izR8uvKFfmmK7RGA6iixnNdNdMGc,1558
33
+ utils/config.py,sha256=pGyQ-sESg1HtpHIWEus_xYG9KEqatnWL7bZqjUEGMCI,8218
34
+ utils/environment.py,sha256=UtM1HkQe5znFVaEzXtpNK01GHy1J0cjHzidyoG4EmgE,13798
35
+ __init__.py,sha256=PbyEy0zP99byR1LTjH0ImWtJ0S5M2ZyTuUjp0i-KKKw,1314
36
+ main.py,sha256=EnUw0cEAxezlo9hJUnUlm4xey0M8kdVzbmm9WJYWugg,17042
37
+ structured2graph-0.1.1.dist-info/METADATA,sha256=Ai50jsvZ16Sl5nqg8qMPnICfxzKyzjQcKsqOvrE9_KY,7453
38
+ structured2graph-0.1.1.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
39
+ structured2graph-0.1.1.dist-info/entry_points.txt,sha256=kdLlrnE_KtaBIqsKIj0oSS_qwEiO9eDYLcGD3JmOrBs,47
40
+ structured2graph-0.1.1.dist-info/licenses/LICENSE,sha256=ZzJm0xDxij_hPJDfDVZ2SYNYaIN-e1CA3TnB94I09Rs,1070
41
+ structured2graph-0.1.1.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.29.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ structured2graph = main:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Memgraph Ltd.
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
utils/__init__.py ADDED
@@ -0,0 +1,57 @@
1
+ """
2
+ Utilities package for the SQL to graph migration agent.
3
+
4
+ This package contains reusable utility modules for environment management,
5
+ database probing, configuration management, and other common functionality.
6
+ """
7
+
8
+ from .environment import (
9
+ MigrationEnvironmentError,
10
+ DatabaseConnectionError,
11
+ load_environment,
12
+ get_required_environment_variables,
13
+ get_optional_environment_variables,
14
+ validate_environment_variables,
15
+ get_source_db_config,
16
+ get_memgraph_config,
17
+ probe_source_connection,
18
+ probe_memgraph_connection,
19
+ validate_llm_providers,
20
+ setup_and_validate_environment,
21
+ probe_all_connections,
22
+ print_environment_help,
23
+ print_troubleshooting_help,
24
+ )
25
+
26
+ from .config import (
27
+ MigrationConfig,
28
+ get_preset_config,
29
+ merge_config_with_preset,
30
+ print_config_summary,
31
+ get_available_presets,
32
+ )
33
+
34
+ __all__ = [
35
+ # Environment utilities
36
+ "MigrationEnvironmentError",
37
+ "DatabaseConnectionError",
38
+ "load_environment",
39
+ "get_required_environment_variables",
40
+ "get_optional_environment_variables",
41
+ "validate_environment_variables",
42
+ "get_source_db_config",
43
+ "get_memgraph_config",
44
+ "probe_source_connection",
45
+ "probe_memgraph_connection",
46
+ "validate_llm_providers",
47
+ "setup_and_validate_environment",
48
+ "probe_all_connections",
49
+ "print_environment_help",
50
+ "print_troubleshooting_help",
51
+ # Configuration utilities
52
+ "MigrationConfig",
53
+ "get_preset_config",
54
+ "merge_config_with_preset",
55
+ "print_config_summary",
56
+ "get_available_presets",
57
+ ]
utils/config.py ADDED
@@ -0,0 +1,235 @@
1
+ """
2
+ Configuration management utilities for the migration agent.
3
+
4
+ This module handles configuration parsing, validation, and default settings
5
+ for different environments and use cases.
6
+ """
7
+
8
+ import os
9
+ from typing import Dict, Any, Optional
10
+ from dataclasses import dataclass
11
+
12
+ from .environment import get_source_db_type
13
+
14
+
15
+ @dataclass
16
+ class MigrationConfig:
17
+ """Configuration class for migration settings."""
18
+
19
+ # Source database settings
20
+ source_db_type: str
21
+ source_db_host: str
22
+ source_db_user: str
23
+ source_db_password: str
24
+ source_db_database: str
25
+ source_db_port: int
26
+ source_db_schema: Optional[str]
27
+
28
+ # Memgraph settings
29
+ memgraph_url: str
30
+ memgraph_username: str
31
+ memgraph_password: str
32
+ memgraph_database: str
33
+
34
+ # OpenAI settings
35
+ openai_api_key: str
36
+
37
+ # Migration settings
38
+ relationship_naming_strategy: str = "table_based"
39
+ interactive_table_selection: bool = True
40
+
41
+ @classmethod
42
+ def from_environment(cls) -> "MigrationConfig":
43
+ """Create configuration from environment variables."""
44
+ db_type = get_source_db_type()
45
+ if db_type == "postgresql":
46
+ source_host = os.getenv("POSTGRES_HOST", "localhost")
47
+ source_user = os.getenv("POSTGRES_USER", "postgres")
48
+ source_password = os.getenv("POSTGRES_PASSWORD", "")
49
+ source_database = os.getenv("POSTGRES_DATABASE", "postgres")
50
+ source_port = int(os.getenv("POSTGRES_PORT", "5432"))
51
+ source_schema = os.getenv("POSTGRES_SCHEMA", "public")
52
+ else:
53
+ source_host = os.getenv("MYSQL_HOST", "host.docker.internal")
54
+ source_user = os.getenv("MYSQL_USER", "root")
55
+ source_password = os.getenv("MYSQL_PASSWORD", "")
56
+ source_database = os.getenv("MYSQL_DATABASE", "sakila")
57
+ source_port = int(os.getenv("MYSQL_PORT", "3306"))
58
+ source_schema = None
59
+
60
+ return cls(
61
+ source_db_type=db_type,
62
+ source_db_host=source_host,
63
+ source_db_user=source_user,
64
+ source_db_password=source_password,
65
+ source_db_database=source_database,
66
+ source_db_port=source_port,
67
+ source_db_schema=source_schema,
68
+ memgraph_url=os.getenv("MEMGRAPH_URL", "bolt://localhost:7687"),
69
+ memgraph_username=os.getenv("MEMGRAPH_USERNAME", ""),
70
+ memgraph_password=os.getenv("MEMGRAPH_PASSWORD", ""),
71
+ memgraph_database=os.getenv("MEMGRAPH_DATABASE", "memgraph"),
72
+ openai_api_key=os.getenv("OPENAI_API_KEY", ""),
73
+ relationship_naming_strategy=os.getenv(
74
+ "RELATIONSHIP_NAMING_STRATEGY", "table_based"
75
+ ),
76
+ interactive_table_selection=os.getenv(
77
+ "INTERACTIVE_TABLE_SELECTION", "true"
78
+ ).lower()
79
+ == "true",
80
+ )
81
+
82
+ def to_source_db_config(self) -> Dict[str, Any]:
83
+ """Convert to a dictionary suitable for analyzer creation."""
84
+ config: Dict[str, Any] = {
85
+ "database_type": self.source_db_type,
86
+ "host": self.source_db_host,
87
+ "user": self.source_db_user,
88
+ "password": self.source_db_password,
89
+ "database": self.source_db_database,
90
+ "port": self.source_db_port,
91
+ }
92
+ if self.source_db_type == "postgresql" and self.source_db_schema:
93
+ config["schema"] = self.source_db_schema
94
+ return config
95
+
96
+ def to_memgraph_config(self) -> Dict[str, str]:
97
+ """Convert to Memgraph configuration dictionary."""
98
+ return {
99
+ "url": self.memgraph_url,
100
+ "username": self.memgraph_username,
101
+ "password": self.memgraph_password,
102
+ "database": self.memgraph_database,
103
+ }
104
+
105
+ def validate(self) -> tuple[bool, list[str]]:
106
+ """
107
+ Validate the configuration.
108
+
109
+ Returns:
110
+ Tuple of (is_valid, validation_errors)
111
+ """
112
+ errors: list[str] = []
113
+
114
+ if not self.openai_api_key:
115
+ errors.append("OPENAI_API_KEY is required")
116
+
117
+ if self.source_db_type == "postgresql":
118
+ if not 1 <= self.source_db_port <= 65535:
119
+ errors.append(f"Invalid PostgreSQL port: {self.source_db_port}")
120
+ else:
121
+ if not 1 <= self.source_db_port <= 65535:
122
+ errors.append(f"Invalid MySQL port: {self.source_db_port}")
123
+
124
+ valid_strategies = ["table_based", "llm"]
125
+ if self.relationship_naming_strategy not in valid_strategies:
126
+ errors.append(
127
+ "Invalid relationship_naming_strategy: "
128
+ f"{self.relationship_naming_strategy}. Must be one of: "
129
+ f"{valid_strategies}"
130
+ )
131
+
132
+ return len(errors) == 0, errors
133
+
134
+
135
+ def get_preset_config(preset_name: str) -> Optional[Dict[str, Any]]:
136
+ """
137
+ Get a preset configuration for common scenarios.
138
+
139
+ Args:
140
+ preset_name: Name of the preset configuration
141
+
142
+ Returns:
143
+ Dictionary with preset configuration values or None if not found
144
+ """
145
+ presets = {
146
+ "local_development": {
147
+ "source_db_type": "mysql",
148
+ "source_db_host": "localhost",
149
+ "source_db_port": 3306,
150
+ "source_db_user": "root",
151
+ "source_db_database": "sakila",
152
+ "memgraph_url": "bolt://localhost:7687",
153
+ "relationship_naming_strategy": "table_based",
154
+ "interactive_table_selection": True,
155
+ },
156
+ "docker_development": {
157
+ "source_db_type": "mysql",
158
+ "source_db_host": "host.docker.internal",
159
+ "source_db_port": 3306,
160
+ "source_db_user": "root",
161
+ "source_db_database": "sakila",
162
+ "memgraph_url": "bolt://localhost:7687",
163
+ "relationship_naming_strategy": "table_based",
164
+ "interactive_table_selection": True,
165
+ },
166
+ "production": {
167
+ "source_db_type": "mysql",
168
+ "source_db_host": "mysql-server",
169
+ "source_db_port": 3306,
170
+ "source_db_user": "migration_user",
171
+ "memgraph_url": "bolt://memgraph-server:7687",
172
+ "relationship_naming_strategy": "llm",
173
+ "interactive_table_selection": False,
174
+ },
175
+ }
176
+
177
+ return presets.get(preset_name)
178
+
179
+
180
+ def merge_config_with_preset(
181
+ config: MigrationConfig, preset_name: str
182
+ ) -> MigrationConfig:
183
+ """
184
+ Merge configuration with a preset, keeping existing values.
185
+
186
+ Args:
187
+ config: Existing configuration
188
+ preset_name: Name of the preset to merge
189
+
190
+ Returns:
191
+ New configuration with preset values applied where not set
192
+ """
193
+ preset = get_preset_config(preset_name)
194
+ if not preset:
195
+ return config
196
+
197
+ config_dict = config.__dict__.copy()
198
+
199
+ for key, preset_value in preset.items():
200
+ if key not in config_dict:
201
+ continue
202
+ current_value = config_dict[key]
203
+ if (
204
+ not current_value
205
+ or (key == "source_db_host" and current_value == "host.docker.internal")
206
+ or (key == "memgraph_url" and current_value == "bolt://localhost:7687")
207
+ ):
208
+ config_dict[key] = preset_value
209
+
210
+ return MigrationConfig(**config_dict)
211
+
212
+
213
+ def print_config_summary(config: MigrationConfig) -> None:
214
+ """Print a summary of the configuration."""
215
+ print("🔧 Configuration Summary:")
216
+ print("-" * 30)
217
+ source_details = (
218
+ f"Source DB: {config.source_db_type}://"
219
+ f"{config.source_db_user}@{config.source_db_host}:"
220
+ f"{config.source_db_port}"
221
+ )
222
+ print(source_details)
223
+ print(f"Database: {config.source_db_database}")
224
+ if config.source_db_type == "postgresql" and config.source_db_schema:
225
+ print(f"Schema: {config.source_db_schema}")
226
+ print(f"Memgraph: {config.memgraph_url}")
227
+ print(f"Strategy: {config.relationship_naming_strategy}")
228
+ print(f"Interactive: {config.interactive_table_selection}")
229
+ print(f"OpenAI API: {'✅ Set' if config.openai_api_key else '❌ Missing'}")
230
+ print()
231
+
232
+
233
+ def get_available_presets() -> list[str]:
234
+ """Get a list of available preset names."""
235
+ return ["local_development", "docker_development", "production"]