PyPI - structured2graph - Versions diffs - 0.1.1__py3-none-any.whl - Mend

structured2graph 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

__init__.py +47 -0
core/__init__.py +23 -0
core/hygm/__init__.py +74 -0
core/hygm/hygm.py +2351 -0
core/hygm/models/__init__.py +82 -0
core/hygm/models/graph_models.py +667 -0
core/hygm/models/llm_models.py +229 -0
core/hygm/models/operations.py +176 -0
core/hygm/models/sources.py +68 -0
core/hygm/models/user_operations.py +139 -0
core/hygm/strategies/__init__.py +17 -0
core/hygm/strategies/base.py +36 -0
core/hygm/strategies/deterministic.py +262 -0
core/hygm/strategies/llm.py +904 -0
core/hygm/validation/__init__.py +38 -0
core/hygm/validation/base.py +194 -0
core/hygm/validation/graph_schema_validator.py +687 -0
core/hygm/validation/memgraph_data_validator.py +991 -0
core/migration_agent.py +1369 -0
core/schema/spec.json +155 -0
core/utils/meta_graph.py +108 -0
database/__init__.py +36 -0
database/adapters/__init__.py +11 -0
database/adapters/memgraph.py +318 -0
database/adapters/mysql.py +311 -0
database/adapters/postgresql.py +335 -0
database/analyzer.py +396 -0
database/factory.py +219 -0
database/models.py +209 -0
main.py +518 -0
query_generation/__init__.py +20 -0
query_generation/cypher_generator.py +129 -0
query_generation/schema_utilities.py +88 -0
structured2graph-0.1.1.dist-info/METADATA +197 -0
structured2graph-0.1.1.dist-info/RECORD +41 -0
structured2graph-0.1.1.dist-info/WHEEL +4 -0
structured2graph-0.1.1.dist-info/entry_points.txt +2 -0
structured2graph-0.1.1.dist-info/licenses/LICENSE +21 -0
utils/__init__.py +57 -0
utils/config.py +235 -0
utils/environment.py +404 -0

main.py ADDED Viewed

@@ -0,0 +1,518 @@
+#!/usr/bin/env python3
+# flake8: noqa
+"""
+SQL Database to Graph Migration Agent - Main Entry Point
+This is the main entry point for the SQL database to graph migration agent.
+Run with: uv run main.py
+"""
+import argparse
+import logging
+import os
+import sys
+from typing import Dict, Any, Optional
+from pathlib import Path
+# Add current directory to Python path for absolute imports
+sys.path.insert(0, str(Path(__file__).parent))
+from utils import (  # noqa: E402
+    MigrationEnvironmentError,
+    DatabaseConnectionError,
+    setup_and_validate_environment,
+    probe_all_connections,
+    print_environment_help,
+    print_troubleshooting_help,
+)
+from core import SQLToMemgraphAgent  # noqa: E402
+from core.hygm import GraphModelingStrategy, ModelingMode  # noqa: E402
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+)
+logger = logging.getLogger(__name__)
+MODE_CHOICES = {
+    "automatic": ModelingMode.AUTOMATIC,
+    "incremental": ModelingMode.INCREMENTAL,
+}
+STRATEGY_CHOICES = {
+    "deterministic": GraphModelingStrategy.DETERMINISTIC,
+    "llm": GraphModelingStrategy.LLM_POWERED,
+    "llm_powered": GraphModelingStrategy.LLM_POWERED,
+}
+META_GRAPH_POLICIES = {"auto", "skip", "reset"}
+LOG_LEVEL_CHOICES = ["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG", "NOTSET"]
+PROVIDER_CHOICES = ["openai", "anthropic", "gemini"]
+def _lower_env(name: str) -> Optional[str]:
+    value = os.getenv(name)
+    return value.lower() if value else None
+def _upper_env(name: str) -> Optional[str]:
+    value = os.getenv(name)
+    return value.upper() if value else None
+def parse_cli_args(argv: Optional[list[str]] = None) -> argparse.Namespace:
+    """Parse command-line arguments for the migration agent."""
+    env_mode = _lower_env("SQL2MG_MODE")
+    env_strategy = _lower_env("SQL2MG_STRATEGY")
+    env_meta_policy = _lower_env("SQL2MG_META_POLICY")
+    env_log_level = _upper_env("SQL2MG_LOG_LEVEL")
+    env_provider = _lower_env("LLM_PROVIDER")
+    env_model = os.getenv("LLM_MODEL")
+    parser = argparse.ArgumentParser(
+        description="SQL database to graph migration agent",
+    )
+    parser.add_argument(
+        "--mode",
+        choices=sorted(MODE_CHOICES.keys()),
+        default=env_mode,
+        type=str.lower,
+        help="Graph modeling mode (automatic|incremental). Overrides SQL2MG_MODE.",
+    )
+    parser.add_argument(
+        "--strategy",
+        choices=["deterministic", "llm"],
+        default=env_strategy,
+        type=str.lower,
+        help="Graph modeling strategy (deterministic|llm). Overrides SQL2MG_STRATEGY.",
+    )
+    parser.add_argument(
+        "--provider",
+        choices=PROVIDER_CHOICES,
+        default=env_provider,
+        type=str.lower,
+        help=(
+            "LLM provider (openai|anthropic|gemini). "
+            "Overrides LLM_PROVIDER. Auto-detects if not specified."
+        ),
+    )
+    parser.add_argument(
+        "--model",
+        default=env_model,
+        help=(
+            "LLM model name. Overrides LLM_MODEL. "
+            "Uses provider default if not specified."
+        ),
+    )
+    parser.add_argument(
+        "--meta-graph",
+        choices=sorted(META_GRAPH_POLICIES),
+        default=env_meta_policy,
+        type=str.lower,
+        help=(
+            "Meta graph policy: auto (default), skip stored metadata, or reset to "
+            "ignore previous migrations. Overrides SQL2MG_META_POLICY."
+        ),
+    )
+    parser.add_argument(
+        "--log-level",
+        choices=LOG_LEVEL_CHOICES,
+        default=env_log_level,
+        type=str.upper,
+        help="Logging level for the agent. Overrides SQL2MG_LOG_LEVEL.",
+    )
+    return parser.parse_args(argv)
+def _configure_log_level(level_name: Optional[str]) -> None:
+    """Configure global logging level if provided."""
+    if not level_name:
+        return
+    numeric_level = getattr(logging, level_name.upper(), None)
+    if not isinstance(numeric_level, int):
+        logger.warning("Unknown log level '%s'; falling back to INFO", level_name)
+        numeric_level = logging.INFO
+    logging.getLogger().setLevel(numeric_level)
+    for handler in logging.getLogger().handlers:
+        handler.setLevel(numeric_level)
+    logger.setLevel(numeric_level)
+def _resolve_mode(cli_mode: Optional[str]) -> Optional[ModelingMode]:
+    if not cli_mode:
+        return None
+    resolved = MODE_CHOICES.get(cli_mode)
+    if not resolved:
+        logger.warning("Unrecognised mode '%s'; falling back to prompt", cli_mode)
+    return resolved
+def _resolve_strategy(cli_strategy: Optional[str]) -> Optional[GraphModelingStrategy]:
+    if not cli_strategy:
+        return None
+    resolved = STRATEGY_CHOICES.get(cli_strategy)
+    if not resolved:
+        logger.warning(
+            "Unrecognised strategy '%s'; falling back to prompt",
+            cli_strategy,
+        )
+    return resolved
+def print_banner() -> None:
+    """Print application banner."""
+    print("=" * 60)
+    print("🚀 SQL Database to Graph Migration Agent")
+    print("=" * 60)
+    print("Intelligent database migration with LLM-powered analysis")
+    print()
+def get_graph_modeling_mode() -> ModelingMode:
+    """
+    Get user choice for graph modeling mode.
+    Returns:
+        ModelingMode: Selected modeling mode
+    """
+    print("Graph modeling mode:")
+    print()
+    print("  1. Automatic     - Generate graph model without prompts")
+    print()
+    print("  2. Incremental   - Review each table with end-of-session refinement")
+    print()
+    while True:
+        try:
+            choice = input("Select mode (1-2) or press Enter for automatic: ").strip()
+            if not choice:
+                return ModelingMode.AUTOMATIC  # Default to automatic
+            if choice == "1":
+                return ModelingMode.AUTOMATIC
+            elif choice == "2":
+                return ModelingMode.INCREMENTAL
+            else:
+                print("Invalid choice. Please select 1-2.")
+        except ValueError:
+            print("Invalid input. Please enter 1-2.")
+def get_graph_modeling_strategy() -> GraphModelingStrategy:
+    """
+    Get user choice for graph modeling strategy.
+    Returns:
+        GraphModelingStrategy: Selected strategy
+    """
+    print("Graph modeling strategy:")
+    print()
+    print("  1. Deterministic - Rule-based graph model creation ")
+    print()
+    print("  2. AI - LLM-based graph model creation (full HyGM capabilities)")
+    print()
+    print()
+    while True:
+        try:
+            choice = input(
+                "Select strategy (1-2) or press Enter for deterministic: "
+            ).strip()
+            if not choice:
+                return GraphModelingStrategy.DETERMINISTIC  # Default
+            if choice == "1":
+                return GraphModelingStrategy.DETERMINISTIC
+            elif choice == "2":
+                return GraphModelingStrategy.LLM_POWERED
+            else:
+                print("Invalid choice. Please select 1-2.")
+        except ValueError:
+            print("Invalid input. Please enter 1-2.")
+def run_migration(
+    source_db_config: Dict[str, Any],
+    memgraph_config: Dict[str, Any],
+    modeling_mode: ModelingMode,
+    graph_modeling_strategy: GraphModelingStrategy,
+    meta_graph_policy: str,
+    llm_provider: Optional[str] = None,
+    llm_model: Optional[str] = None,
+) -> Dict[str, Any]:
+    """
+    Run the migration with the specified configuration.
+    Args:
+        source_db_config: Source database connection configuration
+        memgraph_config: Memgraph connection configuration
+        modeling_mode: Graph modeling mode (automatic or incremental)
+        graph_modeling_strategy: Strategy for graph model creation
+        meta_graph_policy: Meta graph handling policy (auto|skip|reset)
+        llm_provider: LLM provider (openai|anthropic|gemini)
+        llm_model: Specific LLM model name
+    Returns:
+        Migration result dictionary
+    """
+    print("🔧 Creating migration agent...")
+    if modeling_mode == ModelingMode.INCREMENTAL:
+        mode_name = "incremental"
+    else:
+        mode_name = "automatic"
+    strategy_name = graph_modeling_strategy.value
+    print(f"🎯 Graph modeling: {mode_name} with {strategy_name} strategy")
+    if llm_provider:
+        print(f"🤖 LLM Provider: {llm_provider}")
+    if llm_model:
+        print(f"🎯 Model: {llm_model}")
+    print()
+    # Create agent with graph modeling settings
+    agent = SQLToMemgraphAgent(
+        modeling_mode=modeling_mode,
+        graph_modeling_strategy=graph_modeling_strategy,
+        meta_graph_policy=meta_graph_policy,
+        llm_provider=llm_provider,
+        llm_model=llm_model,
+    )
+    print("🚀 Starting migration workflow...")
+    print("This will:")
+    print("  1. 🔍 Analyze source database schema")
+    print("  2. 🎯 Generate graph model with HyGM")
+    print("  3. 📝 Create indexes and constraints")
+    print("  4. ⚙️  Generate migration queries")
+    print("  5. 🔄 Execute migration to Memgraph")
+    print("  6. ✅ Verify the migration results")
+    print()
+    # Handle incremental vs automatic mode
+    if modeling_mode == ModelingMode.INCREMENTAL:
+        print("🔄 Incremental mode: Review LLM-generated graph changes table by table")
+        print("   then approve or tweak differences before refining the model")
+        print()
+    # Run the migration with the user's chosen settings
+    return agent.migrate(source_db_config, memgraph_config)
+def print_migration_results(result: Dict[str, Any]) -> None:
+    """
+    Print formatted migration results.
+    Args:
+        result: Migration result dictionary
+    """
+    print("\n" + "=" * 60)
+    print("📊 MIGRATION RESULTS")
+    print("=" * 60)
+    if result.get("success", False):
+        print("✅ Migration completed successfully!")
+    else:
+        print("❌ Migration encountered errors")
+    # Print error details
+    if result.get("errors"):
+        print(f"\n🚨 Errors ({len(result['errors'])}):")
+        for i, error in enumerate(result["errors"], 1):
+            print(f"  {i}. {error}")
+    # Print completion stats
+    completed = len(result.get("completed_tables", []))
+    total = result.get("total_tables", 0)
+    print(f"\n📋 Tables processed: {completed}/{total}")
+    # Print post-migration validation results
+    validation_report = result.get("validation_report")
+    if validation_report:
+        print("\n✅ Post-migration Validation:")
+        if validation_report.get("success"):
+            print("  🎯 Status: PASSED")
+        else:
+            print("  ⚠️  Status: Issues found")
+        # Display validation score and metrics if available
+        validation_score = validation_report.get("validation_score", 0)
+        print(f"  📊 Validation Score: {int(validation_score)}/100")
+        metrics = validation_report.get("metrics")
+        if metrics:
+            print(f"  📁 Tables: {metrics.tables_covered}/{metrics.tables_total}")
+            print(
+                f"  🏷️  Properties: {metrics.properties_covered}/{metrics.properties_total}"
+            )
+            print(
+                f"  🔗 Relationships: {metrics.relationships_covered}/{metrics.relationships_total}"
+            )
+            print(f"  📇 Indexes: {metrics.indexes_covered}/{metrics.indexes_total}")
+            print(
+                f"  🔒 Constraints: {metrics.constraints_covered}/{metrics.constraints_total}"
+            )
+        # Show validation issues summary
+        issues = validation_report.get("issues", [])
+        if issues:
+            critical_count = sum(
+                1 for issue in issues if issue.get("severity") == "CRITICAL"
+            )
+            warning_count = sum(
+                1 for issue in issues if issue.get("severity") == "WARNING"
+            )
+            info_count = sum(1 for issue in issues if issue.get("severity") == "INFO")
+            print(
+                f"  🚨 Issues: {critical_count} critical, {warning_count} warnings, {info_count} info"
+            )
+            # Show top critical issues
+            critical_issues = [
+                issue for issue in issues if issue.get("severity") == "CRITICAL"
+            ]
+            if critical_issues:
+                print("  📋 Top Critical Issues:")
+                for issue in critical_issues[:3]:
+                    print(f"    - {issue.get('message', 'Unknown issue')}")
+        else:
+            print("  ✅ No validation issues found")
+    # Print schema analysis details
+    if result.get("database_structure"):
+        structure = result["database_structure"]
+        print("\n🔍 Schema Analysis:")
+        print(f"  📁 Entity tables: {len(structure.get('entity_tables', {}))}")
+        print(f"  🔗 Join tables: {len(structure.get('join_tables', {}))}")
+        print(f"  👁️  Views (excluded): {len(structure.get('views', {}))}")
+        print(f"  🔄 Relationships: {len(structure.get('relationships', []))}")
+        # Show index/constraint creation results
+        if result.get("created_indexes") is not None:
+            index_count = len(result.get("created_indexes", []))
+            constraint_count = len(result.get("created_constraints", []))
+            print(f"  📇 Created indexes: {index_count}")
+            print(f"  🔒 Created constraints: {constraint_count}")
+        # Show excluded views
+        if structure.get("views"):
+            print("\n👁️  Excluded view tables:")
+            for table_name, table_info in structure["views"].items():
+                row_count = table_info.get("row_count", 0)
+                print(f"    - {table_name}: {row_count} rows")
+        # Show detected join tables
+        if structure.get("join_tables"):
+            print("\n🔗 Detected join tables:")
+            for table_name, table_info in structure["join_tables"].items():
+                fk_count = len(table_info.get("foreign_keys", []))
+                row_count = table_info.get("row_count", 0)
+                print(f"    - {table_name}: {fk_count} FKs, {row_count} rows")
+        # Show relationship breakdown
+        relationships_by_type = {}
+        for rel in structure.get("relationships", []):
+            rel_type = rel["type"]
+            if rel_type not in relationships_by_type:
+                relationships_by_type[rel_type] = []
+            relationships_by_type[rel_type].append(rel)
+        if relationships_by_type:
+            print("\n🔄 Relationship breakdown:")
+            for rel_type, rels in relationships_by_type.items():
+                print(f"    - {rel_type}: {len(rels)} relationships")
+    print(f"\n🏁 Final status: {result.get('final_step', 'Unknown')}")
+    print("=" * 60)
+def main(argv: Optional[list[str]] = None) -> None:
+    """Main entry point for the migration agent."""
+    args = parse_cli_args(argv)
+    _configure_log_level(args.log_level)
+    print_banner()
+    try:
+        # Setup and validate environment
+        print("🔧 Setting up environment...")
+        source_db_config, memgraph_config = setup_and_validate_environment()
+        print("✅ Environment validation completed")
+        print()
+        # Probe database connections
+        print("🔌 Testing database connections...")
+        probe_all_connections(source_db_config, memgraph_config)
+        print("✅ All connections verified")
+        print()
+        # Get user preferences
+        graph_mode = _resolve_mode(args.mode) or get_graph_modeling_mode()
+        graph_strategy = (
+            _resolve_strategy(args.strategy) or get_graph_modeling_strategy()
+        )
+        meta_graph_policy = (args.meta_graph or "auto").lower()
+        if meta_graph_policy not in META_GRAPH_POLICIES:
+            logger.warning(
+                "Unrecognised meta graph policy '%s'; defaulting to auto",
+                meta_graph_policy,
+            )
+            meta_graph_policy = "auto"
+        # Run migration
+        result = run_migration(
+            source_db_config,
+            memgraph_config,
+            graph_mode,
+            graph_strategy,
+            meta_graph_policy,
+            llm_provider=args.provider,
+            llm_model=args.model,
+        )
+        # Display results
+        print_migration_results(result)
+    except MigrationEnvironmentError as e:
+        print("\n❌ Environment Setup Error:")
+        print(str(e))
+        print_environment_help()
+        sys.exit(1)
+    except DatabaseConnectionError as e:
+        print("\n❌ Database Connection Error:")
+        print(str(e))
+        print_troubleshooting_help()
+        sys.exit(1)
+    except KeyboardInterrupt:
+        print("\n\n⚠️  Migration cancelled by user")
+        sys.exit(0)
+    except Exception as e:  # pylint: disable=broad-except
+        print(f"\n❌ Unexpected Error: {e}")
+        logger.error("Unexpected error in main: %s", e, exc_info=True)
+        print_troubleshooting_help()
+        sys.exit(1)
+if __name__ == "__main__":
+    main()

query_generation/__init__.py ADDED Viewed

@@ -0,0 +1,20 @@
+"""
+Query generation and schema utilities.
+This package provides utilities for generating Cypher queries
+and handling schema transformations.
+"""
+import sys
+from pathlib import Path
+# Add agents root to path for absolute imports
+sys.path.append(str(Path(__file__).parent.parent))
+from query_generation.cypher_generator import CypherGenerator
+from query_generation.schema_utilities import SchemaUtilities
+__all__ = [
+    "CypherGenerator",
+    "SchemaUtilities",
+]

query_generation/cypher_generator.py ADDED Viewed

@@ -0,0 +1,129 @@
+"""
+Cypher query generation utilities for SQL to graph migration.
+Provides label naming, relationship naming, and index generation.
+"""
+from typing import Dict, List, Any, TYPE_CHECKING
+import logging
+if TYPE_CHECKING:
+    from core.hygm.models.graph_models import GraphIndex, GraphConstraint
+logger = logging.getLogger(__name__)
+class CypherGenerator:
+    """Utilities for Cypher query generation in SQL to graph migration."""
+    def __init__(self):
+        """Initialize the Cypher query generator."""
+    def generate_index_queries_from_hygm(
+        self, hygm_indexes: List["GraphIndex"]
+    ) -> List[str]:
+        """Generate index creation queries from HyGM graph model indexes."""
+        queries = []
+        for graph_index in hygm_indexes:
+            # Handle node indexes
+            if graph_index.labels:
+                label = graph_index.labels[0]  # Use first label
+                for prop in graph_index.properties:
+                    query = f"CREATE INDEX ON :{label}({prop})"
+                    queries.append(query.strip())
+            # Handle edge indexes (if supported in future)
+            elif graph_index.edge_type:
+                # Edge indexes are not commonly used in current versions
+                # but we can add support here if needed
+                logger.info("Skipping edge index for %s", graph_index.edge_type)
+        return queries
+    def generate_constraint_queries_from_hygm(
+        self, hygm_constraints: List["GraphConstraint"]
+    ) -> List[str]:
+        """Generate constraint creation queries from HyGM graph model."""
+        queries = []
+        for graph_constraint in hygm_constraints:
+            # Handle node constraints
+            if graph_constraint.labels:
+                label = graph_constraint.labels[0]  # Use first label
+                if graph_constraint.type == "unique":
+                    for prop in graph_constraint.properties:
+                        query = (
+                            f"CREATE CONSTRAINT ON (n:{label}) "
+                            f"ASSERT n.{prop} IS UNIQUE"
+                        )
+                        queries.append(query)
+                # Add support for other constraint types if needed
+                elif graph_constraint.type == "existence":
+                    for prop in graph_constraint.properties:
+                        query = (
+                            f"CREATE CONSTRAINT ON (n:{label}) "
+                            f"ASSERT exists(n.{prop})"
+                        )
+                        queries.append(query)
+        return queries
+    def generate_index_queries(
+        self, table_name: str, schema: List[Dict[str, Any]]
+    ) -> List[str]:
+        """Generate index creation queries."""
+        queries = []
+        label = self._table_name_to_label(table_name)
+        for col in schema:
+            if col["key"] in ["PRI", "UNI", "MUL"]:
+                query = f"CREATE INDEX ON :{label}({col['field']})"
+                queries.append(query.strip())
+        return queries
+    def generate_constraint_queries(
+        self, table_name: str, schema: List[Dict[str, Any]]
+    ) -> List[str]:
+        """Generate constraint creation queries."""
+        queries = []
+        label = self._table_name_to_label(table_name)
+        # Primary key constraints
+        primary_keys = [col["field"] for col in schema if col["key"] == "PRI"]
+        for pk in primary_keys:
+            query = f"CREATE CONSTRAINT ON (n:{label}) ASSERT n.{pk} IS UNIQUE"
+            queries.append(query)
+        # Unique constraints
+        unique_keys = [col["field"] for col in schema if col["key"] == "UNI"]
+        for uk in unique_keys:
+            query = f"CREATE CONSTRAINT ON (n:{label}) ASSERT n.{uk} IS UNIQUE"
+            queries.append(query)
+        return queries
+    def _table_name_to_label(self, table_name: str) -> str:
+        """Convert table name to Cypher label."""
+        # Convert to PascalCase
+        return "".join(word.capitalize() for word in table_name.split("_"))
+    def generate_relationship_type(
+        self, to_table: str, join_table: str | None = None
+    ) -> str:
+        """Generate relationship type based on table names.
+        Args:
+            to_table: Target table name
+            join_table: Join table name (for many-to-many relationships)
+        Returns:
+            Relationship type in UPPER_CASE format
+        """
+        # Table-based naming strategy
+        if join_table:
+            return self._table_name_to_label(join_table).upper()
+        else:
+            return f"HAS_{self._table_name_to_label(to_table).upper()}"