sqlcarbon 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sqlcarbon/__init__.py ADDED
@@ -0,0 +1,41 @@
1
+ """
2
+ SQLcarbon
3
+ =========
4
+ Reliable, deterministic SQL Server table-to-table copy tool.
5
+
6
+ Quickstart (library usage)::
7
+
8
+ from sqlcarbon import MigrationPlan, run_plan
9
+
10
+ plan = MigrationPlan.from_yaml("plan.yaml")
11
+ summary = run_plan(plan)
12
+
13
+ # or from a dict
14
+ plan = MigrationPlan.from_dict({
15
+ "connections": { ... },
16
+ "jobs": [ ... ],
17
+ })
18
+ """
19
+ from .config_loader import (
20
+ AuthConfig,
21
+ ConnectionConfig,
22
+ Defaults,
23
+ JobConfig,
24
+ JobOptions,
25
+ MigrationPlan,
26
+ )
27
+ from .orchestrator import JobResult, RunSummary, run_plan
28
+
29
+ __version__ = "0.1.0"
30
+
31
+ __all__ = [
32
+ "MigrationPlan",
33
+ "ConnectionConfig",
34
+ "AuthConfig",
35
+ "JobConfig",
36
+ "JobOptions",
37
+ "Defaults",
38
+ "run_plan",
39
+ "RunSummary",
40
+ "JobResult",
41
+ ]
sqlcarbon/cli.py ADDED
@@ -0,0 +1,122 @@
1
+ """Command-line interface for SQLcarbon."""
2
+ from __future__ import annotations
3
+
4
+ import logging
5
+ import sys
6
+ from datetime import datetime
7
+
8
+ import click
9
+
10
+ from .config_loader import MigrationPlan
11
+ from .orchestrator import run_plan
12
+
13
+
14
+ def _setup_logging() -> None:
15
+ log_file = f"sqlcarbon_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"
16
+ logging.basicConfig(
17
+ level=logging.INFO,
18
+ format="%(asctime)s %(levelname)-8s %(message)s",
19
+ datefmt="%Y-%m-%d %H:%M:%S",
20
+ handlers=[
21
+ logging.FileHandler(log_file, encoding="utf-8"),
22
+ logging.StreamHandler(sys.stdout),
23
+ ],
24
+ )
25
+
26
+
27
+ @click.group()
28
+ @click.version_option(package_name="sqlcarbon")
29
+ def cli() -> None:
30
+ """SQLcarbon — reliable SQL Server table-to-table copy tool."""
31
+
32
+
33
+ @cli.command()
34
+ @click.argument("config_file", type=click.Path(exists=True, dir_okay=False))
35
+ def run(config_file: str) -> None:
36
+ """Run all jobs defined in CONFIG_FILE."""
37
+ _setup_logging()
38
+ logger = logging.getLogger(__name__)
39
+
40
+ try:
41
+ plan = MigrationPlan.from_yaml(config_file)
42
+ except Exception as exc:
43
+ click.echo(f"ERROR: Failed to load config '{config_file}': {exc}", err=True)
44
+ sys.exit(1)
45
+
46
+ logger.info(
47
+ "Loaded plan: %d connection(s), %d job(s) from '%s'",
48
+ len(plan.connections), len(plan.jobs), config_file,
49
+ )
50
+ summary = run_plan(plan)
51
+ sys.exit(0 if summary.failed == 0 else 1)
52
+
53
+
54
+ @cli.command()
55
+ @click.argument("config_file", type=click.Path(exists=True, dir_okay=False))
56
+ def validate(config_file: str) -> None:
57
+ """Validate CONFIG_FILE without running any jobs."""
58
+ try:
59
+ plan = MigrationPlan.from_yaml(config_file)
60
+ click.echo(
61
+ f"OK: Config is valid — "
62
+ f"{len(plan.connections)} connection(s), {len(plan.jobs)} job(s)."
63
+ )
64
+ except Exception as exc:
65
+ click.echo(f"ERROR: {exc}", err=True)
66
+ sys.exit(1)
67
+
68
+
69
+ @cli.command()
70
+ def init() -> None:
71
+ """Print a sample plan.yaml to stdout."""
72
+ click.echo(_SAMPLE_YAML)
73
+
74
+
75
+ _SAMPLE_YAML = """\
76
+ # SQLcarbon sample plan — copy/paste and edit as needed.
77
+
78
+ connections:
79
+ source_db:
80
+ server: "sql01.example.com"
81
+ database: "SourceDB"
82
+ auth:
83
+ mode: "trusted"
84
+ # driver: "ODBC Driver 17 for SQL Server" # default; change to 18 if needed
85
+
86
+ dest_db:
87
+ server: "sql02.example.com,1445"
88
+ database: "DestDB"
89
+ auth:
90
+ mode: "sql"
91
+ username: "sa"
92
+ password: "changeme"
93
+ driver: "ODBC Driver 18 for SQL Server"
94
+
95
+ defaults:
96
+ batch_size: 100000
97
+ stop_on_failure: false
98
+ create_indexes: false
99
+ create_constraints: false
100
+ include_extended_properties: false
101
+ copy_mode: "full" # full | schema_only | data_only
102
+ nolock: true
103
+
104
+ jobs:
105
+ - name: CopyCustomers
106
+ source_connection: source_db
107
+ destination_connection: dest_db
108
+ source_table: dbo.Customers
109
+ destination_table: dbo.Customers_Archive
110
+ options:
111
+ create_indexes: true
112
+ stop_on_failure: false
113
+
114
+ - name: CopyOrders
115
+ source_connection: source_db
116
+ destination_connection: dest_db
117
+ source_table: dbo.Orders
118
+ destination_table: dbo.Orders_Archive
119
+ options:
120
+ copy_mode: "schema_only"
121
+ stop_on_failure: true
122
+ """
@@ -0,0 +1,94 @@
1
+ """Configuration models and loaders for SQLcarbon."""
2
+ from __future__ import annotations
3
+
4
+ from typing import Literal
5
+
6
+ import yaml
7
+ from pydantic import BaseModel, Field, model_validator
8
+
9
+
10
+ class AuthConfig(BaseModel):
11
+ mode: Literal["trusted", "sql"] = "trusted"
12
+ username: str | None = None
13
+ password: str | None = None
14
+
15
+ @model_validator(mode="after")
16
+ def _check_sql_credentials(self) -> AuthConfig:
17
+ if self.mode == "sql" and (not self.username or not self.password):
18
+ raise ValueError("SQL auth mode requires both username and password")
19
+ return self
20
+
21
+
22
+ class ConnectionConfig(BaseModel):
23
+ server: str
24
+ database: str
25
+ auth: AuthConfig = Field(default_factory=AuthConfig)
26
+ driver: str = "ODBC Driver 17 for SQL Server"
27
+ trust_server_certificate: bool = False
28
+
29
+
30
+ class JobOptions(BaseModel):
31
+ batch_size: int | None = None
32
+ create_indexes: bool | None = None
33
+ create_constraints: bool | None = None
34
+ include_extended_properties: bool | None = None
35
+ stop_on_failure: bool | None = None
36
+ copy_mode: Literal["full", "schema_only", "data_only"] | None = None
37
+
38
+
39
+ class JobConfig(BaseModel):
40
+ name: str
41
+ source_connection: str
42
+ destination_connection: str
43
+ source_table: str
44
+ destination_table: str
45
+ options: JobOptions = Field(default_factory=JobOptions)
46
+
47
+
48
+ class Defaults(BaseModel):
49
+ batch_size: int = 100000
50
+ stop_on_failure: bool = False
51
+ create_indexes: bool = False
52
+ create_constraints: bool = False
53
+ include_extended_properties: bool = False
54
+ copy_mode: Literal["full", "schema_only", "data_only"] = "full"
55
+ nolock: bool = True
56
+
57
+
58
+ class MigrationPlan(BaseModel):
59
+ connections: dict[str, ConnectionConfig]
60
+ jobs: list[JobConfig]
61
+ defaults: Defaults = Field(default_factory=Defaults)
62
+
63
+ @model_validator(mode="after")
64
+ def _validate_job_connections(self) -> MigrationPlan:
65
+ for job in self.jobs:
66
+ if job.source_connection not in self.connections:
67
+ raise ValueError(
68
+ f"Job '{job.name}': source_connection '{job.source_connection}' "
69
+ f"is not defined in connections"
70
+ )
71
+ if job.destination_connection not in self.connections:
72
+ raise ValueError(
73
+ f"Job '{job.name}': destination_connection '{job.destination_connection}' "
74
+ f"is not defined in connections"
75
+ )
76
+ return self
77
+
78
+ @classmethod
79
+ def from_yaml(cls, path: str) -> MigrationPlan:
80
+ """Load a MigrationPlan from a YAML file path."""
81
+ with open(path, "r", encoding="utf-8") as f:
82
+ data = yaml.safe_load(f)
83
+ return cls.model_validate(data)
84
+
85
+ @classmethod
86
+ def from_yaml_string(cls, text: str) -> MigrationPlan:
87
+ """Load a MigrationPlan from a YAML string."""
88
+ data = yaml.safe_load(text)
89
+ return cls.model_validate(data)
90
+
91
+ @classmethod
92
+ def from_dict(cls, data: dict) -> MigrationPlan:
93
+ """Load a MigrationPlan from a Python dict."""
94
+ return cls.model_validate(data)
@@ -0,0 +1,30 @@
1
+ """Connection building and management for SQLcarbon."""
2
+ from __future__ import annotations
3
+
4
+ import pyodbc
5
+
6
+ from .config_loader import ConnectionConfig
7
+
8
+
9
+ def build_connection_string(cfg: ConnectionConfig) -> str:
10
+ """Build a pyodbc connection string from a ConnectionConfig."""
11
+ parts = [
12
+ f"DRIVER={{{cfg.driver}}}",
13
+ f"SERVER={cfg.server}",
14
+ f"DATABASE={cfg.database}",
15
+ ]
16
+ if cfg.auth.mode == "trusted":
17
+ parts.append("Trusted_Connection=yes")
18
+ else:
19
+ parts.append(f"UID={cfg.auth.username}")
20
+ parts.append(f"PWD={cfg.auth.password}")
21
+ if cfg.trust_server_certificate:
22
+ parts.append("Encrypt=yes")
23
+ parts.append("TrustServerCertificate=yes")
24
+ return ";".join(parts)
25
+
26
+
27
+ def get_connection(cfg: ConnectionConfig, autocommit: bool = False) -> pyodbc.Connection:
28
+ """Open and return a pyodbc connection."""
29
+ conn_str = build_connection_string(cfg)
30
+ return pyodbc.connect(conn_str, autocommit=autocommit)
sqlcarbon/copier.py ADDED
@@ -0,0 +1,124 @@
1
+ """Chunked data copy engine for SQLcarbon."""
2
+ from __future__ import annotations
3
+
4
+ import logging
5
+ from decimal import Decimal
6
+
7
+ import pyodbc
8
+
9
+ from .schema_reader import SchemaInfo, parse_table_ref
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ class PartialCopyError(RuntimeError):
15
+ """Raised when a batch insert fails after some rows were already committed."""
16
+
17
+ def __init__(self, message: str, rows_committed: int) -> None:
18
+ super().__init__(message)
19
+ self.rows_committed = rows_committed
20
+
21
+
22
+ def copy_data(
23
+ src_conn: pyodbc.Connection,
24
+ dst_conn: pyodbc.Connection,
25
+ src_table_ref: str,
26
+ dst_table_ref: str,
27
+ schema_info: SchemaInfo,
28
+ batch_size: int,
29
+ nolock: bool,
30
+ job_name: str,
31
+ ) -> int:
32
+ """
33
+ Stream rows from source to destination in chunks.
34
+
35
+ Returns the total number of rows successfully inserted.
36
+ Raises PartialCopyError if a batch fails after rows have already been committed.
37
+ Raises RuntimeError for failures before any rows are committed.
38
+ """
39
+ src_schema, src_table = parse_table_ref(src_table_ref)
40
+ dst_schema, dst_table = parse_table_ref(dst_table_ref)
41
+
42
+ # Only non-computed columns can be SELECTed and INSERTed explicitly
43
+ cols = schema_info.copyable_columns
44
+ col_names = [f"[{col.name}]" for col in cols]
45
+
46
+ nolock_hint = " WITH (NOLOCK)" if nolock else ""
47
+ select_sql = (
48
+ f"SELECT {', '.join(col_names)} "
49
+ f"FROM [{src_schema}].[{src_table}]{nolock_hint};"
50
+ )
51
+ params_placeholder = ", ".join(["?"] * len(col_names))
52
+ insert_sql = (
53
+ f"INSERT INTO [{dst_schema}].[{dst_table}] WITH (TABLOCK) "
54
+ f"({', '.join(col_names)}) VALUES ({params_placeholder});"
55
+ )
56
+
57
+ src_cursor = src_conn.cursor()
58
+ dst_cursor = dst_conn.cursor()
59
+ dst_cursor.fast_executemany = True
60
+
61
+ identity_col = schema_info.identity_column
62
+ if identity_col:
63
+ logger.info(
64
+ "[%s] Setting IDENTITY_INSERT ON for [%s].[%s].",
65
+ job_name, dst_schema, dst_table,
66
+ )
67
+ dst_cursor.execute(f"SET IDENTITY_INSERT [{dst_schema}].[{dst_table}] ON;")
68
+
69
+ src_cursor.execute(select_sql)
70
+ total_rows = 0
71
+
72
+ try:
73
+ while True:
74
+ rows = src_cursor.fetchmany(batch_size)
75
+ if not rows:
76
+ break
77
+
78
+ # pyodbc requires plain Python types; convert Decimal to str
79
+ processed = [
80
+ tuple(str(v) if isinstance(v, Decimal) else v for v in row)
81
+ for row in rows
82
+ ]
83
+
84
+ try:
85
+ dst_cursor.executemany(insert_sql, processed)
86
+ dst_conn.commit()
87
+ total_rows += len(rows)
88
+ logger.info("[%s] ... %s rows inserted.", job_name, f"{total_rows:,}")
89
+ except Exception as batch_err:
90
+ try:
91
+ dst_conn.rollback()
92
+ except Exception:
93
+ pass
94
+ msg = (
95
+ f"Batch insert failed after {total_rows:,} rows were committed. "
96
+ f"Destination table [{dst_schema}].[{dst_table}] contains incomplete data. "
97
+ f"Underlying error: {batch_err}"
98
+ )
99
+ if total_rows > 0:
100
+ raise PartialCopyError(msg, total_rows) from batch_err
101
+ raise RuntimeError(msg) from batch_err
102
+
103
+ finally:
104
+ if identity_col:
105
+ try:
106
+ dst_cursor.execute(
107
+ f"SET IDENTITY_INSERT [{dst_schema}].[{dst_table}] OFF;"
108
+ )
109
+ dst_conn.commit()
110
+ logger.info("[%s] IDENTITY_INSERT OFF.", job_name)
111
+ except Exception as exc:
112
+ logger.warning(
113
+ "[%s] Could not SET IDENTITY_INSERT OFF: %s", job_name, exc
114
+ )
115
+ try:
116
+ src_cursor.close()
117
+ except Exception:
118
+ pass
119
+ try:
120
+ dst_cursor.close()
121
+ except Exception:
122
+ pass
123
+
124
+ return total_rows
@@ -0,0 +1,125 @@
1
+ """DDL generation from SchemaInfo objects."""
2
+ from __future__ import annotations
3
+
4
+ from .schema_reader import ColumnInfo, SchemaInfo, parse_table_ref
5
+
6
+
7
+ def _column_type_str(col: ColumnInfo) -> str:
8
+ dt = col.data_type
9
+ if dt in ("varchar", "nvarchar", "char", "nchar", "binary", "varbinary"):
10
+ length = "max" if col.char_length == -1 else str(col.char_length)
11
+ return f"{dt}({length})"
12
+ if dt in ("decimal", "numeric"):
13
+ return f"{dt}({col.numeric_precision}, {col.numeric_scale})"
14
+ if dt in ("datetime2", "datetimeoffset", "time"):
15
+ return f"{dt}({col.datetime_precision})"
16
+ return dt
17
+
18
+
19
+ def generate_create_table(schema_info: SchemaInfo, dest_table_ref: str) -> str:
20
+ """Return a CREATE TABLE statement for the destination table."""
21
+ dest_schema, dest_table = parse_table_ref(dest_table_ref)
22
+
23
+ col_defs: list[str] = []
24
+ for col in schema_info.columns:
25
+ if col.is_computed:
26
+ defn = f" [{col.name}] AS {col.computed_definition}"
27
+ if col.computed_is_persisted:
28
+ defn += " PERSISTED"
29
+ else:
30
+ type_str = _column_type_str(col)
31
+ defn = f" [{col.name}] {type_str}"
32
+ if col.is_identity:
33
+ seed = col.identity_seed if col.identity_seed is not None else 1
34
+ inc = col.identity_increment if col.identity_increment is not None else 1
35
+ defn += f" IDENTITY({seed},{inc})"
36
+ defn += " NOT NULL" if not col.is_nullable else " NULL"
37
+ col_defs.append(defn)
38
+
39
+ cols_str = ",\n".join(col_defs)
40
+ return f"CREATE TABLE [{dest_schema}].[{dest_table}] (\n{cols_str}\n);"
41
+
42
+
43
+ def generate_create_indexes(schema_info: SchemaInfo, dest_table_ref: str) -> list[str]:
44
+ """Return CREATE INDEX / ALTER TABLE ADD CONSTRAINT PRIMARY KEY statements."""
45
+ dest_schema, dest_table = parse_table_ref(dest_table_ref)
46
+ statements: list[str] = []
47
+
48
+ for idx in schema_info.indexes:
49
+ key_cols = [c for c in idx.columns if not c.is_included]
50
+ inc_cols = [c for c in idx.columns if c.is_included]
51
+
52
+ key_parts = [
53
+ f"[{c.name}] {'DESC' if c.is_descending else 'ASC'}" for c in key_cols
54
+ ]
55
+
56
+ if idx.is_primary_key:
57
+ sql = (
58
+ f"ALTER TABLE [{dest_schema}].[{dest_table}]\n"
59
+ f" ADD CONSTRAINT [{idx.name}] PRIMARY KEY {idx.type_desc}\n"
60
+ f" ({', '.join(key_parts)});"
61
+ )
62
+ else:
63
+ unique_str = "UNIQUE " if idx.is_unique else ""
64
+ sql = (
65
+ f"CREATE {unique_str}{idx.type_desc} INDEX [{idx.name}]\n"
66
+ f" ON [{dest_schema}].[{dest_table}] ({', '.join(key_parts)})"
67
+ )
68
+ if inc_cols:
69
+ inc_parts = [f"[{c.name}]" for c in inc_cols]
70
+ sql += f"\n INCLUDE ({', '.join(inc_parts)})"
71
+ sql += ";"
72
+
73
+ statements.append(sql)
74
+
75
+ return statements
76
+
77
+
78
+ def generate_add_constraints(schema_info: SchemaInfo, dest_table_ref: str) -> list[str]:
79
+ """Return ALTER TABLE ADD CONSTRAINT statements for check and default constraints."""
80
+ dest_schema, dest_table = parse_table_ref(dest_table_ref)
81
+ statements: list[str] = []
82
+
83
+ for cc in schema_info.check_constraints:
84
+ statements.append(
85
+ f"ALTER TABLE [{dest_schema}].[{dest_table}]\n"
86
+ f" ADD CONSTRAINT [{cc.name}] CHECK {cc.definition};"
87
+ )
88
+
89
+ for dc in schema_info.default_constraints:
90
+ statements.append(
91
+ f"ALTER TABLE [{dest_schema}].[{dest_table}]\n"
92
+ f" ADD CONSTRAINT [{dc.name}] DEFAULT {dc.definition}"
93
+ f" FOR [{dc.column_name}];"
94
+ )
95
+
96
+ return statements
97
+
98
+
99
+ def generate_extended_properties(
100
+ schema_info: SchemaInfo, dest_table_ref: str
101
+ ) -> list[str]:
102
+ """Return sp_addextendedproperty EXEC statements."""
103
+ dest_schema, dest_table = parse_table_ref(dest_table_ref)
104
+ statements: list[str] = []
105
+
106
+ for ep in schema_info.extended_properties:
107
+ escaped = str(ep.value).replace("'", "''")
108
+ if ep.column_name:
109
+ sql = (
110
+ f"EXEC sys.sp_addextendedproperty\n"
111
+ f" @name = N'{ep.name}', @value = N'{escaped}',\n"
112
+ f" @level0type = N'Schema', @level0name = N'{dest_schema}',\n"
113
+ f" @level1type = N'Table', @level1name = N'{dest_table}',\n"
114
+ f" @level2type = N'Column', @level2name = N'{ep.column_name}';"
115
+ )
116
+ else:
117
+ sql = (
118
+ f"EXEC sys.sp_addextendedproperty\n"
119
+ f" @name = N'{ep.name}', @value = N'{escaped}',\n"
120
+ f" @level0type = N'Schema', @level0name = N'{dest_schema}',\n"
121
+ f" @level1type = N'Table', @level1name = N'{dest_table}';"
122
+ )
123
+ statements.append(sql)
124
+
125
+ return statements