PyPI - starrocks-br - Versions diffs - 0.1.0__py3-none-any.whl - Mend

starrocks-br 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

starrocks_br/__init__.py +1 -0
starrocks_br/cli.py +385 -0
starrocks_br/concurrency.py +177 -0
starrocks_br/config.py +41 -0
starrocks_br/db.py +88 -0
starrocks_br/executor.py +245 -0
starrocks_br/health.py +34 -0
starrocks_br/history.py +93 -0
starrocks_br/labels.py +52 -0
starrocks_br/logger.py +36 -0
starrocks_br/planner.py +280 -0
starrocks_br/repository.py +36 -0
starrocks_br/restore.py +493 -0
starrocks_br/schema.py +144 -0
starrocks_br-0.1.0.dist-info/METADATA +12 -0
starrocks_br-0.1.0.dist-info/RECORD +19 -0
starrocks_br-0.1.0.dist-info/WHEEL +5 -0
starrocks_br-0.1.0.dist-info/entry_points.txt +2 -0
starrocks_br-0.1.0.dist-info/top_level.txt +1 -0

starrocks_br/executor.py ADDED Viewed

@@ -0,0 +1,245 @@
+import time
+import datetime
+from typing import Dict, Literal, Optional
+from . import history, concurrency, logger
+MAX_POLLS = 21600 # 6 hours
+def submit_backup_command(db, backup_command: str) -> tuple[bool, Optional[str]]:
+    """Submit a backup command to StarRocks.
+    Returns (success, error_message).
+    """
+    try:
+        db.execute(backup_command.strip())
+        return True, None
+    except Exception as e:
+        error_msg = f"Failed to submit backup command: {type(e).__name__}: {str(e)}"
+        logger.error(error_msg)
+        logger.error(f"backup_command: {backup_command}")
+        return False, error_msg
+def poll_backup_status(db, label: str, database: str, max_polls: int = MAX_POLLS, poll_interval: float = 1.0) -> Dict[str, str]:
+    """Poll backup status until completion or timeout.
+    Note: SHOW BACKUP only returns the LAST backup in a database.
+    We verify that the SnapshotName matches our expected label.
+    Important: If we see a different snapshot name, it means another backup
+    operation overwrote ours and we've lost tracking (race condition).
+    Args:
+        db: Database connection
+        label: Expected snapshot name (label) to monitor
+        database: Database name where backup was submitted
+        max_polls: Maximum number of polling attempts
+        poll_interval: Seconds to wait between polls
+    Returns dictionary with keys: state, label
+    Possible states: FINISHED, CANCELLED, TIMEOUT, ERROR, LOST
+    """
+    query = f"SHOW BACKUP FROM {database}"
+    first_poll = True
+    last_state = None
+    poll_count = 0
+    for _ in range(max_polls):
+        poll_count += 1
+        try:
+            rows = db.query(query)
+            if not rows:
+                time.sleep(poll_interval)
+                continue
+            result = rows[0]
+            if isinstance(result, dict):
+                snapshot_name = result.get("SnapshotName", "")
+                state = result.get("State", "UNKNOWN")
+            else:
+                snapshot_name = result[1] if len(result) > 1 else ""
+                state = result[3] if len(result) > 3 else "UNKNOWN"
+            if snapshot_name != label:
+                if first_poll:
+                    first_poll = False
+                    time.sleep(poll_interval)
+                    continue
+                else:
+                    return {"state": "LOST", "label": label}
+            first_poll = False
+            if state != last_state or poll_count % 10 == 0:
+                logger.progress(f"Backup status: {state} (poll {poll_count}/{max_polls})")
+                last_state = state
+            if state in ["FINISHED", "CANCELLED"]:
+                return {"state": state, "label": label}
+            time.sleep(poll_interval)
+        except Exception:
+            return {"state": "ERROR", "label": label}
+    return {"state": "TIMEOUT", "label": label}
+def execute_backup(
+    db,
+    backup_command: str,
+    max_polls: int = MAX_POLLS,
+    poll_interval: float = 1.0,
+    *,
+    repository: str,
+    backup_type: Literal['incremental', 'full'] = None,
+    scope: str = "backup",
+    database: Optional[str] = None,
+) -> Dict:
+    """Execute a complete backup workflow: submit command and monitor progress.
+    Args:
+        db: Database connection
+        backup_command: Backup SQL command to execute
+        max_polls: Maximum polling attempts
+        poll_interval: Seconds between polls
+        repository: Repository name (for logging)
+        backup_type: Type of backup (for logging)
+        scope: Job scope (for concurrency control)
+        database: Database name (required for SHOW BACKUP)
+    Returns dictionary with keys: success, final_status, error_message
+    """
+    label = _extract_label_from_command(backup_command)
+    if not database:
+        database = _extract_database_from_command(backup_command)
+    started_at = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+    success, submit_error = submit_backup_command(db, backup_command)
+    if not success:
+        return {
+            "success": False,
+            "final_status": None,
+            "error_message": submit_error or "Failed to submit backup command (unknown error)"
+        }
+    try:
+        final_status = poll_backup_status(db, label, database, max_polls, poll_interval)
+        success = final_status["state"] == "FINISHED"
+        try:
+            history.log_backup(
+                db,
+                {
+                    "label": label,
+                    "backup_type": backup_type,
+                    "status": final_status["state"],
+                    "repository": repository,
+                    "started_at": started_at,
+                    "finished_at": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                    "error_message": None if success else (final_status["state"] or ""),
+                },
+            )
+        except Exception:
+            pass
+        try:
+            concurrency.complete_job_slot(db, scope=scope, label=label, final_state=final_status["state"])
+        except Exception:
+            pass
+        return {
+            "success": success,
+            "final_status": final_status,
+            "error_message": None if success else _build_error_message(final_status, label, database)
+        }
+    except Exception as e:
+        error_msg = f"Unexpected error during backup execution: {type(e).__name__}: {str(e)}"
+        logger.error(error_msg)
+        return {
+            "success": False,
+            "final_status": {"state": "ERROR", "label": label},
+            "error_message": error_msg
+        }
+def _build_error_message(final_status: Dict, label: str, database: str) -> str:
+    """Build a descriptive error message based on backup final status."""
+    state = final_status.get('state', 'UNKNOWN')
+    if state == "LOST":
+        return (
+            f"Backup tracking lost for '{label}' in database '{database}'. "
+            f"Another backup operation overwrote the last backup status visible in SHOW BACKUP. "
+            f"This indicates a concurrency issue - only one backup per database should run at a time. "
+            f"Recommendation: Use ops.run_status concurrency control to prevent simultaneous backups, "
+            f"or verify if another tool/user is running backups on this database."
+        )
+    elif state == "CANCELLED":
+        return (
+            f"Backup '{label}' was cancelled by StarRocks. "
+            f"Check StarRocks logs for the reason (common causes: insufficient resources, storage issues, or manual cancellation)."
+        )
+    elif state == "TIMEOUT":
+        return (
+            f"Backup '{label}' monitoring timed out after {MAX_POLLS} polls. "
+            f"The backup may still be running in the background. "
+            f"Check SHOW BACKUP FROM {database} manually to see current status."
+        )
+    elif state == "ERROR":
+        return (
+            f"Error occurred while monitoring backup '{label}' status. "
+            f"The backup may have been submitted but monitoring failed. "
+            f"Check SHOW BACKUP FROM {database} and StarRocks logs for details."
+        )
+    else:
+        return f"Backup '{label}' failed with unexpected state: {state}"
+def _extract_label_from_command(backup_command: str) -> str:
+    """Extract the snapshot label from a backup command.
+    This is a simple parser for StarRocks backup commands.
+    Handles both formats:
+    - BACKUP DATABASE db SNAPSHOT label TO repo
+    - BACKUP SNAPSHOT label TO repo (legacy)
+    """
+    lines = backup_command.strip().split('\n')
+    for line in lines:
+        line = line.strip()
+        if line.startswith('BACKUP DATABASE'):
+            parts = line.split()
+            for i, part in enumerate(parts):
+                if part == 'SNAPSHOT' and i + 1 < len(parts):
+                    return parts[i + 1]
+        elif line.startswith('BACKUP SNAPSHOT'):
+            # Legacy syntax
+            parts = line.split()
+            if len(parts) >= 3:
+                return parts[2]
+    return "unknown_backup"
+def _extract_database_from_command(backup_command: str) -> str:
+    """Extract the database name from a backup command.
+    Parses: BACKUP DATABASE db_name SNAPSHOT label ...
+    """
+    lines = backup_command.strip().split('\n')
+    for line in lines:
+        line = line.strip()
+        if line.startswith('BACKUP DATABASE'):
+            parts = line.split()
+            if len(parts) >= 3:
+                return parts[2]
+    return "unknown_database"

starrocks_br/health.py ADDED Viewed

@@ -0,0 +1,34 @@
+from typing import Tuple
+def check_cluster_health(db) -> Tuple[bool, str]:
+    """Check FE/BE health via SHOW FRONTENDS/BACKENDS.
+    Returns (ok, message).
+    """
+    fe_rows = db.query("SHOW FRONTENDS")
+    be_rows = db.query("SHOW BACKENDS")
+    def is_alive(value: str) -> bool:
+        return str(value).upper() in {"ALIVE", "TRUE", "YES", "1"}
+    any_dead = False
+    for row in fe_rows:
+        fe_joined_cluster = str(row[9]).upper() if len(row) > 9 else "TRUE"
+        fe_is_alive = str(row[10]).upper() if len(row) > 10 else "TRUE"
+        if not is_alive(fe_joined_cluster) or not is_alive(fe_is_alive):
+            any_dead = True
+            break
+    if not any_dead:
+        for row in be_rows:
+            be_is_alive = str(row[8]).upper() if len(row) > 8 else "TRUE"
+            if not is_alive(be_is_alive):
+                any_dead = True
+                break
+    if any_dead:
+        return False, "Cluster unhealthy: some FE/BE are DEAD or not READY"
+    return True, "Cluster healthy: all FE/BE are ALIVE and READY"

starrocks_br/history.py ADDED Viewed

@@ -0,0 +1,93 @@
+from typing import Dict, Optional
+from . import logger
+def log_backup(db, entry: Dict[str, Optional[str]]) -> None:
+    """Write a backup history entry to ops.backup_history.
+    Expected keys in entry:
+      - job_id (optional; auto-generated if missing)
+      - label
+      - backup_type (incremental|full)
+      - status (FINISHED|FAILED|CANCELLED)
+      - repository
+      - started_at (YYYY-MM-DD HH:MM:SS)
+      - finished_at (YYYY-MM-DD HH:MM:SS)
+      - error_message (nullable)
+    """
+    label = entry.get("label", "")
+    backup_type = entry.get("backup_type", "")
+    status = entry.get("status", "")
+    repository = entry.get("repository", "")
+    started_at = entry.get("started_at", "NULL")
+    finished_at = entry.get("finished_at", "NULL")
+    error_message = entry.get("error_message")
+    def esc(val: Optional[str]) -> str:
+        if val is None:
+            return "NULL"
+        return "'" + str(val).replace("'", "''") + "'"
+    sql = f"""
+    INSERT INTO ops.backup_history (
+        label, backup_type, status, repository, started_at, finished_at, error_message
+    ) VALUES (
+        {esc(label)}, {esc(backup_type)}, {esc(status)}, {esc(repository)},
+        {esc(started_at)}, {esc(finished_at)}, {esc(error_message)}
+    )
+    """
+    try:
+        db.execute(sql)
+    except Exception as e:
+        logger.error(f"Failed to log backup history: {str(e)}")
+        raise
+def log_restore(db, entry: Dict[str, Optional[str]]) -> None:
+    """Write a restore history entry to ops.restore_history.
+    Expected keys in entry:
+      - job_id
+      - backup_label
+      - restore_type (partition|table|database)
+      - status (FINISHED|FAILED|CANCELLED)
+      - repository
+      - started_at (YYYY-MM-DD HH:MM:SS)
+      - finished_at (YYYY-MM-DD HH:MM:SS)
+      - error_message (nullable)
+      - verification_checksum (optional)
+    """
+    job_id = entry.get("job_id", "")
+    backup_label = entry.get("backup_label", "")
+    restore_type = entry.get("restore_type", "")
+    status = entry.get("status", "")
+    repository = entry.get("repository", "")
+    started_at = entry.get("started_at", "NULL")
+    finished_at = entry.get("finished_at", "NULL")
+    error_message = entry.get("error_message")
+    verification_checksum = entry.get("verification_checksum")
+    def esc(val: Optional[str]) -> str:
+        if val is None:
+            return "NULL"
+        return "'" + str(val).replace("'", "''") + "'"
+    sql = f"""
+    INSERT INTO ops.restore_history (
+        job_id, backup_label, restore_type, status, repository,
+        started_at, finished_at, error_message, verification_checksum
+    ) VALUES (
+        {esc(job_id)}, {esc(backup_label)}, {esc(restore_type)}, {esc(status)},
+        {esc(repository)}, {esc(started_at)}, {esc(finished_at)},
+        {esc(error_message)}, {esc(verification_checksum)}
+    )
+    """
+    try:
+        db.execute(sql)
+    except Exception as e:
+        logger.error(f"Failed to log restore history: {str(e)}")
+        raise

starrocks_br/labels.py ADDED Viewed

@@ -0,0 +1,52 @@
+from typing import Optional, Literal
+from datetime import datetime
+def determine_backup_label(db, backup_type: Literal['incremental', 'full'], database_name: str, custom_name: Optional[str] = None) -> str:
+    """Determine a unique backup label for the given parameters.
+    This is the single entry point for all backup label generation. It handles both
+    custom names and auto-generated date-based labels, ensuring uniqueness by checking
+    the ops.backup_history table.
+    Args:
+        db: Database connection
+        backup_type: Type of backup (incremental, full)
+        database_name: Name of the database being backed up
+        custom_name: Optional custom name for the backup. If provided, this becomes
+                    the base label. If None, generates a date-based label.
+    Returns:
+        Unique label string that doesn't conflict with existing backups
+    """
+    if custom_name:
+        base_label = custom_name
+    else:
+        today = datetime.now().strftime("%Y%m%d")
+        base_label = f"{database_name}_{today}_{backup_type}"
+    query = """
+    SELECT label
+    FROM ops.backup_history
+    WHERE label LIKE %s
+    ORDER BY label
+    """
+    pattern = f"{base_label}%"
+    try:
+        rows = db.query(query, (pattern,))
+        existing_labels = [row[0] for row in rows] if rows else []
+    except Exception:
+        existing_labels = []
+    if base_label not in existing_labels:
+        return base_label
+    retry_count = 1
+    while True:
+        candidate_label = f"{base_label}_r{retry_count}"
+        if candidate_label not in existing_labels:
+            return candidate_label
+        retry_count += 1

starrocks_br/logger.py ADDED Viewed

@@ -0,0 +1,36 @@
+import click
+def info(message: str) -> None:
+    """Log an informational message."""
+    click.echo(message)
+def success(message: str) -> None:
+    """Log a success message with checkmark."""
+    click.echo(f"✓ {message}")
+def warning(message: str) -> None:
+    """Log a warning message with warning symbol."""
+    click.echo(f"⚠ {message}", err=True)
+def error(message: str) -> None:
+    """Log an error message with error prefix."""
+    click.echo(f"Error: {message}", err=True)
+def critical(message: str) -> None:
+    """Log a critical error message with critical symbol."""
+    click.echo(f"❌ CRITICAL: {message}", err=True)
+def progress(message: str) -> None:
+    """Log a progress message with hourglass symbol."""
+    click.echo(f"⏳ {message}")
+def tip(message: str) -> None:
+    """Log a tip message with lightbulb symbol."""
+    click.echo(f"💡 {message}", err=True)