PyPI - sqlcarbon - Versions diffs - 0.1.0__py3-none-any.whl - Mend

sqlcarbon 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

sqlcarbon/__init__.py +41 -0
sqlcarbon/cli.py +122 -0
sqlcarbon/config_loader.py +94 -0
sqlcarbon/connection.py +30 -0
sqlcarbon/copier.py +124 -0
sqlcarbon/ddl_generator.py +125 -0
sqlcarbon/orchestrator.py +282 -0
sqlcarbon/schema_reader.py +345 -0
sqlcarbon/version_checker.py +80 -0
sqlcarbon-0.1.0.dist-info/METADATA +395 -0
sqlcarbon-0.1.0.dist-info/RECORD +13 -0
sqlcarbon-0.1.0.dist-info/WHEEL +4 -0
sqlcarbon-0.1.0.dist-info/entry_points.txt +2 -0

sqlcarbon/__init__.py ADDED Viewed

@@ -0,0 +1,41 @@
+"""
+SQLcarbon
+=========
+Reliable, deterministic SQL Server table-to-table copy tool.
+Quickstart (library usage)::
+    from sqlcarbon import MigrationPlan, run_plan
+    plan = MigrationPlan.from_yaml("plan.yaml")
+    summary = run_plan(plan)
+    # or from a dict
+    plan = MigrationPlan.from_dict({
+        "connections": { ... },
+        "jobs": [ ... ],
+    })
+"""
+from .config_loader import (
+    AuthConfig,
+    ConnectionConfig,
+    Defaults,
+    JobConfig,
+    JobOptions,
+    MigrationPlan,
+)
+from .orchestrator import JobResult, RunSummary, run_plan
+__version__ = "0.1.0"
+__all__ = [
+    "MigrationPlan",
+    "ConnectionConfig",
+    "AuthConfig",
+    "JobConfig",
+    "JobOptions",
+    "Defaults",
+    "run_plan",
+    "RunSummary",
+    "JobResult",
+]

sqlcarbon/cli.py ADDED Viewed

@@ -0,0 +1,122 @@
+"""Command-line interface for SQLcarbon."""
+from __future__ import annotations
+import logging
+import sys
+from datetime import datetime
+import click
+from .config_loader import MigrationPlan
+from .orchestrator import run_plan
+def _setup_logging() -> None:
+    log_file = f"sqlcarbon_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"
+    logging.basicConfig(
+        level=logging.INFO,
+        format="%(asctime)s %(levelname)-8s %(message)s",
+        datefmt="%Y-%m-%d %H:%M:%S",
+        handlers=[
+            logging.FileHandler(log_file, encoding="utf-8"),
+            logging.StreamHandler(sys.stdout),
+        ],
+    )
+@click.group()
+@click.version_option(package_name="sqlcarbon")
+def cli() -> None:
+    """SQLcarbon — reliable SQL Server table-to-table copy tool."""
+@cli.command()
+@click.argument("config_file", type=click.Path(exists=True, dir_okay=False))
+def run(config_file: str) -> None:
+    """Run all jobs defined in CONFIG_FILE."""
+    _setup_logging()
+    logger = logging.getLogger(__name__)
+    try:
+        plan = MigrationPlan.from_yaml(config_file)
+    except Exception as exc:
+        click.echo(f"ERROR: Failed to load config '{config_file}': {exc}", err=True)
+        sys.exit(1)
+    logger.info(
+        "Loaded plan: %d connection(s), %d job(s) from '%s'",
+        len(plan.connections), len(plan.jobs), config_file,
+    )
+    summary = run_plan(plan)
+    sys.exit(0 if summary.failed == 0 else 1)
+@cli.command()
+@click.argument("config_file", type=click.Path(exists=True, dir_okay=False))
+def validate(config_file: str) -> None:
+    """Validate CONFIG_FILE without running any jobs."""
+    try:
+        plan = MigrationPlan.from_yaml(config_file)
+        click.echo(
+            f"OK: Config is valid — "
+            f"{len(plan.connections)} connection(s), {len(plan.jobs)} job(s)."
+        )
+    except Exception as exc:
+        click.echo(f"ERROR: {exc}", err=True)
+        sys.exit(1)
+@cli.command()
+def init() -> None:
+    """Print a sample plan.yaml to stdout."""
+    click.echo(_SAMPLE_YAML)
+_SAMPLE_YAML = """\
+# SQLcarbon sample plan — copy/paste and edit as needed.
+connections:
+  source_db:
+    server: "sql01.example.com"
+    database: "SourceDB"
+    auth:
+      mode: "trusted"
+    # driver: "ODBC Driver 17 for SQL Server"   # default; change to 18 if needed
+  dest_db:
+    server: "sql02.example.com,1445"
+    database: "DestDB"
+    auth:
+      mode: "sql"
+      username: "sa"
+      password: "changeme"
+    driver: "ODBC Driver 18 for SQL Server"
+defaults:
+  batch_size: 100000
+  stop_on_failure: false
+  create_indexes: false
+  create_constraints: false
+  include_extended_properties: false
+  copy_mode: "full"          # full | schema_only | data_only
+  nolock: true
+jobs:
+  - name: CopyCustomers
+    source_connection: source_db
+    destination_connection: dest_db
+    source_table: dbo.Customers
+    destination_table: dbo.Customers_Archive
+    options:
+      create_indexes: true
+      stop_on_failure: false
+  - name: CopyOrders
+    source_connection: source_db
+    destination_connection: dest_db
+    source_table: dbo.Orders
+    destination_table: dbo.Orders_Archive
+    options:
+      copy_mode: "schema_only"
+      stop_on_failure: true
+"""

sqlcarbon/config_loader.py ADDED Viewed

@@ -0,0 +1,94 @@
+"""Configuration models and loaders for SQLcarbon."""
+from __future__ import annotations
+from typing import Literal
+import yaml
+from pydantic import BaseModel, Field, model_validator
+class AuthConfig(BaseModel):
+    mode: Literal["trusted", "sql"] = "trusted"
+    username: str | None = None
+    password: str | None = None
+    @model_validator(mode="after")
+    def _check_sql_credentials(self) -> AuthConfig:
+        if self.mode == "sql" and (not self.username or not self.password):
+            raise ValueError("SQL auth mode requires both username and password")
+        return self
+class ConnectionConfig(BaseModel):
+    server: str
+    database: str
+    auth: AuthConfig = Field(default_factory=AuthConfig)
+    driver: str = "ODBC Driver 17 for SQL Server"
+    trust_server_certificate: bool = False
+class JobOptions(BaseModel):
+    batch_size: int | None = None
+    create_indexes: bool | None = None
+    create_constraints: bool | None = None
+    include_extended_properties: bool | None = None
+    stop_on_failure: bool | None = None
+    copy_mode: Literal["full", "schema_only", "data_only"] | None = None
+class JobConfig(BaseModel):
+    name: str
+    source_connection: str
+    destination_connection: str
+    source_table: str
+    destination_table: str
+    options: JobOptions = Field(default_factory=JobOptions)
+class Defaults(BaseModel):
+    batch_size: int = 100000
+    stop_on_failure: bool = False
+    create_indexes: bool = False
+    create_constraints: bool = False
+    include_extended_properties: bool = False
+    copy_mode: Literal["full", "schema_only", "data_only"] = "full"
+    nolock: bool = True
+class MigrationPlan(BaseModel):
+    connections: dict[str, ConnectionConfig]
+    jobs: list[JobConfig]
+    defaults: Defaults = Field(default_factory=Defaults)
+    @model_validator(mode="after")
+    def _validate_job_connections(self) -> MigrationPlan:
+        for job in self.jobs:
+            if job.source_connection not in self.connections:
+                raise ValueError(
+                    f"Job '{job.name}': source_connection '{job.source_connection}' "
+                    f"is not defined in connections"
+                )
+            if job.destination_connection not in self.connections:
+                raise ValueError(
+                    f"Job '{job.name}': destination_connection '{job.destination_connection}' "
+                    f"is not defined in connections"
+                )
+        return self
+    @classmethod
+    def from_yaml(cls, path: str) -> MigrationPlan:
+        """Load a MigrationPlan from a YAML file path."""
+        with open(path, "r", encoding="utf-8") as f:
+            data = yaml.safe_load(f)
+        return cls.model_validate(data)
+    @classmethod
+    def from_yaml_string(cls, text: str) -> MigrationPlan:
+        """Load a MigrationPlan from a YAML string."""
+        data = yaml.safe_load(text)
+        return cls.model_validate(data)
+    @classmethod
+    def from_dict(cls, data: dict) -> MigrationPlan:
+        """Load a MigrationPlan from a Python dict."""
+        return cls.model_validate(data)

sqlcarbon/connection.py ADDED Viewed

@@ -0,0 +1,30 @@
+"""Connection building and management for SQLcarbon."""
+from __future__ import annotations
+import pyodbc
+from .config_loader import ConnectionConfig
+def build_connection_string(cfg: ConnectionConfig) -> str:
+    """Build a pyodbc connection string from a ConnectionConfig."""
+    parts = [
+        f"DRIVER={{{cfg.driver}}}",
+        f"SERVER={cfg.server}",
+        f"DATABASE={cfg.database}",
+    ]
+    if cfg.auth.mode == "trusted":
+        parts.append("Trusted_Connection=yes")
+    else:
+        parts.append(f"UID={cfg.auth.username}")
+        parts.append(f"PWD={cfg.auth.password}")
+    if cfg.trust_server_certificate:
+        parts.append("Encrypt=yes")
+        parts.append("TrustServerCertificate=yes")
+    return ";".join(parts)
+def get_connection(cfg: ConnectionConfig, autocommit: bool = False) -> pyodbc.Connection:
+    """Open and return a pyodbc connection."""
+    conn_str = build_connection_string(cfg)
+    return pyodbc.connect(conn_str, autocommit=autocommit)

sqlcarbon/copier.py ADDED Viewed

@@ -0,0 +1,124 @@
+"""Chunked data copy engine for SQLcarbon."""
+from __future__ import annotations
+import logging
+from decimal import Decimal
+import pyodbc
+from .schema_reader import SchemaInfo, parse_table_ref
+logger = logging.getLogger(__name__)
+class PartialCopyError(RuntimeError):
+    """Raised when a batch insert fails after some rows were already committed."""
+    def __init__(self, message: str, rows_committed: int) -> None:
+        super().__init__(message)
+        self.rows_committed = rows_committed
+def copy_data(
+    src_conn: pyodbc.Connection,
+    dst_conn: pyodbc.Connection,
+    src_table_ref: str,
+    dst_table_ref: str,
+    schema_info: SchemaInfo,
+    batch_size: int,
+    nolock: bool,
+    job_name: str,
+) -> int:
+    """
+    Stream rows from source to destination in chunks.
+    Returns the total number of rows successfully inserted.
+    Raises PartialCopyError if a batch fails after rows have already been committed.
+    Raises RuntimeError for failures before any rows are committed.
+    """
+    src_schema, src_table = parse_table_ref(src_table_ref)
+    dst_schema, dst_table = parse_table_ref(dst_table_ref)
+    # Only non-computed columns can be SELECTed and INSERTed explicitly
+    cols = schema_info.copyable_columns
+    col_names = [f"[{col.name}]" for col in cols]
+    nolock_hint = " WITH (NOLOCK)" if nolock else ""
+    select_sql = (
+        f"SELECT {', '.join(col_names)} "
+        f"FROM [{src_schema}].[{src_table}]{nolock_hint};"
+    )
+    params_placeholder = ", ".join(["?"] * len(col_names))
+    insert_sql = (
+        f"INSERT INTO [{dst_schema}].[{dst_table}] WITH (TABLOCK) "
+        f"({', '.join(col_names)}) VALUES ({params_placeholder});"
+    )
+    src_cursor = src_conn.cursor()
+    dst_cursor = dst_conn.cursor()
+    dst_cursor.fast_executemany = True
+    identity_col = schema_info.identity_column
+    if identity_col:
+        logger.info(
+            "[%s] Setting IDENTITY_INSERT ON for [%s].[%s].",
+            job_name, dst_schema, dst_table,
+        )
+        dst_cursor.execute(f"SET IDENTITY_INSERT [{dst_schema}].[{dst_table}] ON;")
+    src_cursor.execute(select_sql)
+    total_rows = 0
+    try:
+        while True:
+            rows = src_cursor.fetchmany(batch_size)
+            if not rows:
+                break
+            # pyodbc requires plain Python types; convert Decimal to str
+            processed = [
+                tuple(str(v) if isinstance(v, Decimal) else v for v in row)
+                for row in rows
+            ]
+            try:
+                dst_cursor.executemany(insert_sql, processed)
+                dst_conn.commit()
+                total_rows += len(rows)
+                logger.info("[%s]   ... %s rows inserted.", job_name, f"{total_rows:,}")
+            except Exception as batch_err:
+                try:
+                    dst_conn.rollback()
+                except Exception:
+                    pass
+                msg = (
+                    f"Batch insert failed after {total_rows:,} rows were committed. "
+                    f"Destination table [{dst_schema}].[{dst_table}] contains incomplete data. "
+                    f"Underlying error: {batch_err}"
+                )
+                if total_rows > 0:
+                    raise PartialCopyError(msg, total_rows) from batch_err
+                raise RuntimeError(msg) from batch_err
+    finally:
+        if identity_col:
+            try:
+                dst_cursor.execute(
+                    f"SET IDENTITY_INSERT [{dst_schema}].[{dst_table}] OFF;"
+                )
+                dst_conn.commit()
+                logger.info("[%s] IDENTITY_INSERT OFF.", job_name)
+            except Exception as exc:
+                logger.warning(
+                    "[%s] Could not SET IDENTITY_INSERT OFF: %s", job_name, exc
+                )
+        try:
+            src_cursor.close()
+        except Exception:
+            pass
+        try:
+            dst_cursor.close()
+        except Exception:
+            pass
+    return total_rows

sqlcarbon/ddl_generator.py ADDED Viewed

@@ -0,0 +1,125 @@
+"""DDL generation from SchemaInfo objects."""
+from __future__ import annotations
+from .schema_reader import ColumnInfo, SchemaInfo, parse_table_ref
+def _column_type_str(col: ColumnInfo) -> str:
+    dt = col.data_type
+    if dt in ("varchar", "nvarchar", "char", "nchar", "binary", "varbinary"):
+        length = "max" if col.char_length == -1 else str(col.char_length)
+        return f"{dt}({length})"
+    if dt in ("decimal", "numeric"):
+        return f"{dt}({col.numeric_precision}, {col.numeric_scale})"
+    if dt in ("datetime2", "datetimeoffset", "time"):
+        return f"{dt}({col.datetime_precision})"
+    return dt
+def generate_create_table(schema_info: SchemaInfo, dest_table_ref: str) -> str:
+    """Return a CREATE TABLE statement for the destination table."""
+    dest_schema, dest_table = parse_table_ref(dest_table_ref)
+    col_defs: list[str] = []
+    for col in schema_info.columns:
+        if col.is_computed:
+            defn = f"    [{col.name}] AS {col.computed_definition}"
+            if col.computed_is_persisted:
+                defn += " PERSISTED"
+        else:
+            type_str = _column_type_str(col)
+            defn = f"    [{col.name}] {type_str}"
+            if col.is_identity:
+                seed = col.identity_seed if col.identity_seed is not None else 1
+                inc = col.identity_increment if col.identity_increment is not None else 1
+                defn += f" IDENTITY({seed},{inc})"
+            defn += " NOT NULL" if not col.is_nullable else " NULL"
+        col_defs.append(defn)
+    cols_str = ",\n".join(col_defs)
+    return f"CREATE TABLE [{dest_schema}].[{dest_table}] (\n{cols_str}\n);"
+def generate_create_indexes(schema_info: SchemaInfo, dest_table_ref: str) -> list[str]:
+    """Return CREATE INDEX / ALTER TABLE ADD CONSTRAINT PRIMARY KEY statements."""
+    dest_schema, dest_table = parse_table_ref(dest_table_ref)
+    statements: list[str] = []
+    for idx in schema_info.indexes:
+        key_cols = [c for c in idx.columns if not c.is_included]
+        inc_cols = [c for c in idx.columns if c.is_included]
+        key_parts = [
+            f"[{c.name}] {'DESC' if c.is_descending else 'ASC'}" for c in key_cols
+        ]
+        if idx.is_primary_key:
+            sql = (
+                f"ALTER TABLE [{dest_schema}].[{dest_table}]\n"
+                f"    ADD CONSTRAINT [{idx.name}] PRIMARY KEY {idx.type_desc}\n"
+                f"    ({', '.join(key_parts)});"
+            )
+        else:
+            unique_str = "UNIQUE " if idx.is_unique else ""
+            sql = (
+                f"CREATE {unique_str}{idx.type_desc} INDEX [{idx.name}]\n"
+                f"    ON [{dest_schema}].[{dest_table}] ({', '.join(key_parts)})"
+            )
+            if inc_cols:
+                inc_parts = [f"[{c.name}]" for c in inc_cols]
+                sql += f"\n    INCLUDE ({', '.join(inc_parts)})"
+            sql += ";"
+        statements.append(sql)
+    return statements
+def generate_add_constraints(schema_info: SchemaInfo, dest_table_ref: str) -> list[str]:
+    """Return ALTER TABLE ADD CONSTRAINT statements for check and default constraints."""
+    dest_schema, dest_table = parse_table_ref(dest_table_ref)
+    statements: list[str] = []
+    for cc in schema_info.check_constraints:
+        statements.append(
+            f"ALTER TABLE [{dest_schema}].[{dest_table}]\n"
+            f"    ADD CONSTRAINT [{cc.name}] CHECK {cc.definition};"
+        )
+    for dc in schema_info.default_constraints:
+        statements.append(
+            f"ALTER TABLE [{dest_schema}].[{dest_table}]\n"
+            f"    ADD CONSTRAINT [{dc.name}] DEFAULT {dc.definition}"
+            f" FOR [{dc.column_name}];"
+        )
+    return statements
+def generate_extended_properties(
+    schema_info: SchemaInfo, dest_table_ref: str
+) -> list[str]:
+    """Return sp_addextendedproperty EXEC statements."""
+    dest_schema, dest_table = parse_table_ref(dest_table_ref)
+    statements: list[str] = []
+    for ep in schema_info.extended_properties:
+        escaped = str(ep.value).replace("'", "''")
+        if ep.column_name:
+            sql = (
+                f"EXEC sys.sp_addextendedproperty\n"
+                f"    @name = N'{ep.name}', @value = N'{escaped}',\n"
+                f"    @level0type = N'Schema', @level0name = N'{dest_schema}',\n"
+                f"    @level1type = N'Table',  @level1name = N'{dest_table}',\n"
+                f"    @level2type = N'Column', @level2name = N'{ep.column_name}';"
+            )
+        else:
+            sql = (
+                f"EXEC sys.sp_addextendedproperty\n"
+                f"    @name = N'{ep.name}', @value = N'{escaped}',\n"
+                f"    @level0type = N'Schema', @level0name = N'{dest_schema}',\n"
+                f"    @level1type = N'Table',  @level1name = N'{dest_table}';"
+            )
+        statements.append(sql)
+    return statements