PyPI - starrocks-br - Versions diffs - 0.2.0__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

starrocks-br 0.2.0py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

starrocks_br/cli.py +257 -193
starrocks_br/concurrency.py +50 -50
starrocks_br/config.py +31 -23
starrocks_br/db.py +37 -37
starrocks_br/executor.py +100 -71
starrocks_br/health.py +1 -6
starrocks_br/history.py +5 -6
starrocks_br/labels.py +14 -10
starrocks_br/planner.py +119 -113
starrocks_br/repository.py +3 -5
starrocks_br/restore.py +240 -187
starrocks_br/schema.py +20 -16
starrocks_br/timezone.py +28 -29
starrocks_br/utils.py +86 -0
starrocks_br-0.4.0.dist-info/METADATA +152 -0
starrocks_br-0.4.0.dist-info/RECORD +21 -0
starrocks_br-0.2.0.dist-info/METADATA +0 -12
starrocks_br-0.2.0.dist-info/RECORD +0 -20
{starrocks_br-0.2.0.dist-info → starrocks_br-0.4.0.dist-info}/WHEEL +0 -0
{starrocks_br-0.2.0.dist-info → starrocks_br-0.4.0.dist-info}/entry_points.txt +0 -0
{starrocks_br-0.2.0.dist-info → starrocks_br-0.4.0.dist-info}/top_level.txt +0 -0

starrocks_br/executor.py CHANGED Viewed

@@ -1,14 +1,30 @@
-import time
-import datetime
 import re
-from typing import Dict, Literal, Optional, Tuple
-from . import history, concurrency, logger, timezone
+import time
+from typing import Literal, Optional
+from . import concurrency, history, logger, timezone
+MAX_POLLS = 86400  # 1 day
+def _calculate_next_interval(current_interval: float, max_interval: float) -> float:
+    """Calculate the next polling interval using exponential backoff.
+    Args:
+        current_interval: Current polling interval in seconds
+        max_interval: Maximum allowed interval in seconds
+    Returns:
+        Next interval (min of doubled current interval and max_interval)
+    """
+    return min(current_interval * 2, max_interval)
-MAX_POLLS = 86400 # 1 day
-def submit_backup_command(db, backup_command: str) -> Tuple[bool, Optional[str], Optional[Dict[str, str]]]:
+def submit_backup_command(
+    db, backup_command: str
+) -> tuple[bool, Optional[str], Optional[dict[str, str]]]:
     """Submit a backup command to StarRocks.
     Returns (success, error_message, error_details).
     error_details is a dict with keys like 'error_type' and 'snapshot_name' for specific error cases.
     """
@@ -18,19 +34,16 @@ def submit_backup_command(db, backup_command: str) -> Tuple[bool, Optional[str],
     except Exception as e:
         error_str = str(e)
         error_type = type(e).__name__
         snapshot_exists_match = _check_snapshot_exists_error(e, error_str)
         if snapshot_exists_match:
             snapshot_name = snapshot_exists_match
-            error_details = {
-                'error_type': 'snapshot_exists',
-                'snapshot_name': snapshot_name
-            }
+            error_details = {"error_type": "snapshot_exists", "snapshot_name": snapshot_name}
             error_msg = f"Snapshot '{snapshot_name}' already exists in repository"
             logger.error(error_msg)
             logger.error(f"backup_command: {backup_command}")
             return False, error_msg, error_details
         error_msg = f"Failed to submit backup command: {error_type}: {error_str}"
         logger.error(error_msg)
         logger.error(f"backup_command: {backup_command}")
@@ -39,48 +52,56 @@ def submit_backup_command(db, backup_command: str) -> Tuple[bool, Optional[str],
 def _check_snapshot_exists_error(exception: Exception, error_str: str) -> Optional[str]:
     """Check if the error is a 'snapshot already exists' error and extract snapshot name.
     Args:
         exception: The exception that was raised
         error_str: String representation of the error
     Returns:
         Snapshot name if this is a snapshot exists error, None otherwise
     """
     snapshot_name_pattern = r"Snapshot with name '([^']+)' already exist"
     error_lower = error_str.lower()
     is_snapshot_exists_error = (
-        "already exist" in error_lower or
-        "already exists" in error_lower or
-        ("5064" in error_str and "already exist" in error_lower) or
-        (hasattr(exception, 'errno') and exception.errno == 5064)
+        "already exist" in error_lower
+        or "already exists" in error_lower
+        or ("5064" in error_str and "already exist" in error_lower)
+        or (hasattr(exception, "errno") and exception.errno == 5064)
     )
     if is_snapshot_exists_error:
         match = re.search(snapshot_name_pattern, error_str, re.IGNORECASE)
         if match:
             return match.group(1)
     return None
-def poll_backup_status(db, label: str, database: str, max_polls: int = MAX_POLLS, poll_interval: float = 1.0) -> Dict[str, str]:
+def poll_backup_status(
+    db,
+    label: str,
+    database: str,
+    max_polls: int = MAX_POLLS,
+    poll_interval: float = 1.0,
+    max_poll_interval: float = 60.0,
+) -> dict[str, str]:
     """Poll backup status until completion or timeout.
     Note: SHOW BACKUP only returns the LAST backup in a database.
     We verify that the SnapshotName matches our expected label.
     Important: If we see a different snapshot name, it means another backup
     operation overwrote ours and we've lost tracking (race condition).
     Args:
         db: Database connection
         label: Expected snapshot name (label) to monitor
         database: Database name where backup was submitted
         max_polls: Maximum number of polling attempts
-        poll_interval: Seconds to wait between polls
+        poll_interval: Initial seconds to wait between polls (exponentially increases)
+        max_poll_interval: Maximum interval between polls (default 60 seconds)
     Returns dictionary with keys: state, label
     Possible states: FINISHED, CANCELLED, TIMEOUT, ERROR, LOST
     """
@@ -88,47 +109,51 @@ def poll_backup_status(db, label: str, database: str, max_polls: int = MAX_POLLS
     first_poll = True
     last_state = None
     poll_count = 0
+    current_interval = poll_interval
     for _ in range(max_polls):
         poll_count += 1
         try:
             rows = db.query(query)
             if not rows:
-                time.sleep(poll_interval)
+                time.sleep(current_interval)
+                current_interval = _calculate_next_interval(current_interval, max_poll_interval)
                 continue
             result = rows[0]
             if isinstance(result, dict):
                 snapshot_name = result.get("SnapshotName", "")
                 state = result.get("State", "UNKNOWN")
             else:
                 snapshot_name = result[1] if len(result) > 1 else ""
                 state = result[3] if len(result) > 3 else "UNKNOWN"
             if snapshot_name != label:
                 if first_poll:
                     first_poll = False
-                    time.sleep(poll_interval)
+                    time.sleep(current_interval)
+                    current_interval = _calculate_next_interval(current_interval, max_poll_interval)
                     continue
                 else:
                     return {"state": "LOST", "label": label}
             first_poll = False
             if state != last_state or poll_count % 10 == 0:
                 logger.progress(f"Backup status: {state} (poll {poll_count}/{max_polls})")
                 last_state = state
             if state in ["FINISHED", "CANCELLED"]:
                 return {"state": state, "label": label}
-            time.sleep(poll_interval)
+            time.sleep(current_interval)
+            current_interval = _calculate_next_interval(current_interval, max_poll_interval)
         except Exception:
             return {"state": "ERROR", "label": label}
     return {"state": "TIMEOUT", "label": label}
@@ -139,12 +164,12 @@ def execute_backup(
     poll_interval: float = 1.0,
     *,
     repository: str,
-    backup_type: Literal['incremental', 'full'] = None,
+    backup_type: Literal["incremental", "full"] = None,
     scope: str = "backup",
     database: Optional[str] = None,
-) -> Dict:
+) -> dict:
     """Execute a complete backup workflow: submit command and monitor progress.
     Args:
         db: Database connection
         backup_command: Backup SQL command to execute
@@ -154,31 +179,31 @@ def execute_backup(
         backup_type: Type of backup (for logging)
         scope: Job scope (for concurrency control)
         database: Database name (required for SHOW BACKUP)
     Returns dictionary with keys: success, final_status, error_message
     """
     label = _extract_label_from_command(backup_command)
     if not database:
         database = _extract_database_from_command(backup_command)
     cluster_tz = db.timezone
     started_at = timezone.get_current_time_in_cluster_tz(cluster_tz)
     success, submit_error, error_details = submit_backup_command(db, backup_command)
     if not success:
         result = {
             "success": False,
             "final_status": None,
-            "error_message": submit_error or "Failed to submit backup command (unknown error)"
+            "error_message": submit_error or "Failed to submit backup command (unknown error)",
         }
         if error_details:
             result["error_details"] = error_details
         return result
     try:
         final_status = poll_backup_status(db, label, database, max_polls, poll_interval)
         success = final_status["state"] == "FINISHED"
         try:
@@ -199,30 +224,34 @@ def execute_backup(
             pass
         try:
-            concurrency.complete_job_slot(db, scope=scope, label=label, final_state=final_status["state"])
+            concurrency.complete_job_slot(
+                db, scope=scope, label=label, final_state=final_status["state"]
+            )
         except Exception:
             pass
         return {
             "success": success,
             "final_status": final_status,
-            "error_message": None if success else _build_error_message(final_status, label, database)
+            "error_message": None
+            if success
+            else _build_error_message(final_status, label, database),
         }
     except Exception as e:
         error_msg = f"Unexpected error during backup execution: {type(e).__name__}: {str(e)}"
         logger.error(error_msg)
         return {
             "success": False,
             "final_status": {"state": "ERROR", "label": label},
-            "error_message": error_msg
+            "error_message": error_msg,
         }
-def _build_error_message(final_status: Dict, label: str, database: str) -> str:
+def _build_error_message(final_status: dict, label: str, database: str) -> str:
     """Build a descriptive error message based on backup final status."""
-    state = final_status.get('state', 'UNKNOWN')
+    state = final_status.get("state", "UNKNOWN")
     if state == "LOST":
         return (
             f"Backup tracking lost for '{label}' in database '{database}'. "
@@ -254,42 +283,42 @@ def _build_error_message(final_status: Dict, label: str, database: str) -> str:
 def _extract_label_from_command(backup_command: str) -> str:
     """Extract the snapshot label from a backup command.
     This is a simple parser for StarRocks backup commands.
     Handles both formats:
     - BACKUP DATABASE db SNAPSHOT label TO repo
     - BACKUP SNAPSHOT label TO repo (legacy)
     """
-    lines = backup_command.strip().split('\n')
+    lines = backup_command.strip().split("\n")
     for line in lines:
         line = line.strip()
-        if line.startswith('BACKUP DATABASE'):
+        if line.startswith("BACKUP DATABASE"):
             parts = line.split()
             for i, part in enumerate(parts):
-                if part == 'SNAPSHOT' and i + 1 < len(parts):
+                if part == "SNAPSHOT" and i + 1 < len(parts):
                     return parts[i + 1]
-        elif line.startswith('BACKUP SNAPSHOT'):
+        elif line.startswith("BACKUP SNAPSHOT"):
             # Legacy syntax
             parts = line.split()
             if len(parts) >= 3:
                 return parts[2]
     return "unknown_backup"
 def _extract_database_from_command(backup_command: str) -> str:
     """Extract the database name from a backup command.
     Parses: BACKUP DATABASE db_name SNAPSHOT label ...
     """
-    lines = backup_command.strip().split('\n')
+    lines = backup_command.strip().split("\n")
     for line in lines:
         line = line.strip()
-        if line.startswith('BACKUP DATABASE'):
+        if line.startswith("BACKUP DATABASE"):
             parts = line.split()
             if len(parts) >= 3:
                 return parts[2]
     return "unknown_database"

starrocks_br/health.py CHANGED Viewed

@@ -1,7 +1,4 @@
-from typing import Tuple
-def check_cluster_health(db) -> Tuple[bool, str]:
+def check_cluster_health(db) -> tuple[bool, str]:
     """Check FE/BE health via SHOW FRONTENDS/BACKENDS.
     Returns (ok, message).
@@ -30,5 +27,3 @@ def check_cluster_health(db) -> Tuple[bool, str]:
     if any_dead:
         return False, "Cluster unhealthy: some FE/BE are DEAD or not READY"
     return True, "Cluster healthy: all FE/BE are ALIVE and READY"

starrocks_br/history.py CHANGED Viewed

@@ -1,8 +1,9 @@
-from typing import Dict, Optional
+from typing import Optional
 from . import logger
-def log_backup(db, entry: Dict[str, Optional[str]]) -> None:
+def log_backup(db, entry: dict[str, Optional[str]]) -> None:
     """Write a backup history entry to ops.backup_history.
     Expected keys in entry:
@@ -36,7 +37,7 @@ def log_backup(db, entry: Dict[str, Optional[str]]) -> None:
         {esc(started_at)}, {esc(finished_at)}, {esc(error_message)}
     )
     """
     try:
         db.execute(sql)
     except Exception as e:
@@ -44,7 +45,7 @@ def log_backup(db, entry: Dict[str, Optional[str]]) -> None:
         raise
-def log_restore(db, entry: Dict[str, Optional[str]]) -> None:
+def log_restore(db, entry: dict[str, Optional[str]]) -> None:
     """Write a restore history entry to ops.restore_history.
     Expected keys in entry:
@@ -89,5 +90,3 @@ def log_restore(db, entry: Dict[str, Optional[str]]) -> None:
     except Exception as e:
         logger.error(f"Failed to log restore history: {str(e)}")
         raise

starrocks_br/labels.py CHANGED Viewed

@@ -1,21 +1,26 @@
-from typing import Optional, Literal
 from datetime import datetime
+from typing import Literal, Optional
-def determine_backup_label(db, backup_type: Literal['incremental', 'full'], database_name: str, custom_name: Optional[str] = None) -> str:
+def determine_backup_label(
+    db,
+    backup_type: Literal["incremental", "full"],
+    database_name: str,
+    custom_name: Optional[str] = None,
+) -> str:
     """Determine a unique backup label for the given parameters.
     This is the single entry point for all backup label generation. It handles both
     custom names and auto-generated date-based labels, ensuring uniqueness by checking
     the ops.backup_history table.
     Args:
         db: Database connection
         backup_type: Type of backup (incremental, full)
         database_name: Name of the database being backed up
         custom_name: Optional custom name for the backup. If provided, this becomes
                     the base label. If None, generates a date-based label.
     Returns:
         Unique label string that doesn't conflict with existing backups
     """
@@ -24,26 +29,25 @@ def determine_backup_label(db, backup_type: Literal['incremental', 'full'], data
     else:
         today = datetime.now().strftime("%Y%m%d")
         base_label = f"{database_name}_{today}_{backup_type}"
     query = """
     SELECT label
     FROM ops.backup_history
     WHERE label LIKE %s
     ORDER BY label
     """
     pattern = f"{base_label}%"
     try:
         rows = db.query(query, (pattern,))
         existing_labels = [row[0] for row in rows] if rows else []
     except Exception:
         existing_labels = []
     if base_label not in existing_labels:
         return base_label
     retry_count = 1
     while True:
         candidate_label = f"{base_label}_r{retry_count}"

starrocks-br 0.2.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

starrocks-br 0.2.0py3-none-any.whl → 0.4.0py3-none-any.whl