starrocks-br 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- starrocks_br/cli.py +104 -8
- starrocks_br/config.py +26 -3
- starrocks_br/db.py +75 -9
- starrocks_br/executor.py +62 -12
- starrocks_br/planner.py +41 -17
- starrocks_br/restore.py +64 -12
- starrocks_br/schema.py +6 -3
- starrocks_br/timezone.py +125 -0
- starrocks_br-0.3.0.dist-info/METADATA +456 -0
- starrocks_br-0.3.0.dist-info/RECORD +20 -0
- starrocks_br-0.1.0.dist-info/METADATA +0 -12
- starrocks_br-0.1.0.dist-info/RECORD +0 -19
- {starrocks_br-0.1.0.dist-info → starrocks_br-0.3.0.dist-info}/WHEEL +0 -0
- {starrocks_br-0.1.0.dist-info → starrocks_br-0.3.0.dist-info}/entry_points.txt +0 -0
- {starrocks_br-0.1.0.dist-info → starrocks_br-0.3.0.dist-info}/top_level.txt +0 -0
starrocks_br/restore.py
CHANGED
|
@@ -3,7 +3,7 @@ import datetime
|
|
|
3
3
|
from typing import Dict, List, Optional
|
|
4
4
|
from . import history, concurrency, logger
|
|
5
5
|
|
|
6
|
-
MAX_POLLS =
|
|
6
|
+
MAX_POLLS = 86400 # 1 day
|
|
7
7
|
|
|
8
8
|
def get_snapshot_timestamp(db, repo_name: str, snapshot_name: str) -> str:
|
|
9
9
|
"""Get the backup timestamp for a specific snapshot from the repository.
|
|
@@ -281,17 +281,30 @@ def find_restore_pair(db, target_label: str) -> List[str]:
|
|
|
281
281
|
raise ValueError(f"Unknown backup type '{target_info['backup_type']}' for label '{target_label}'")
|
|
282
282
|
|
|
283
283
|
|
|
284
|
-
def get_tables_from_backup(db, label: str, group: Optional[str] = None) -> List[str]:
|
|
284
|
+
def get_tables_from_backup(db, label: str, group: Optional[str] = None, table: Optional[str] = None, database: Optional[str] = None) -> List[str]:
|
|
285
285
|
"""Get list of tables to restore from backup manifest.
|
|
286
286
|
|
|
287
287
|
Args:
|
|
288
288
|
db: Database connection
|
|
289
289
|
label: Backup label
|
|
290
290
|
group: Optional inventory group to filter tables
|
|
291
|
+
table: Optional table name to filter (single table, database comes from database parameter)
|
|
292
|
+
database: Database name (required if table is specified)
|
|
291
293
|
|
|
292
294
|
Returns:
|
|
293
|
-
List of table names to restore
|
|
295
|
+
List of table names to restore (format: database.table)
|
|
296
|
+
|
|
297
|
+
Raises:
|
|
298
|
+
ValueError: If both group and table are specified
|
|
299
|
+
ValueError: If table is specified but database is not provided
|
|
300
|
+
ValueError: If table is specified but not found in backup
|
|
294
301
|
"""
|
|
302
|
+
if group and table:
|
|
303
|
+
raise ValueError("Cannot specify both --group and --table. Use --table for single table restore or --group for inventory group restore.")
|
|
304
|
+
|
|
305
|
+
if table and not database:
|
|
306
|
+
raise ValueError("database parameter is required when table is specified")
|
|
307
|
+
|
|
295
308
|
query = f"""
|
|
296
309
|
SELECT DISTINCT database_name, table_name
|
|
297
310
|
FROM ops.backup_partitions
|
|
@@ -305,6 +318,15 @@ def get_tables_from_backup(db, label: str, group: Optional[str] = None) -> List[
|
|
|
305
318
|
|
|
306
319
|
tables = [f"{row[0]}.{row[1]}" for row in rows]
|
|
307
320
|
|
|
321
|
+
if table:
|
|
322
|
+
target_table = f"{database}.{table}"
|
|
323
|
+
filtered_tables = [t for t in tables if t == target_table]
|
|
324
|
+
|
|
325
|
+
if not filtered_tables:
|
|
326
|
+
raise ValueError(f"Table '{table}' not found in backup '{label}' for database '{database}'")
|
|
327
|
+
|
|
328
|
+
return filtered_tables
|
|
329
|
+
|
|
308
330
|
if group:
|
|
309
331
|
group_query = f"""
|
|
310
332
|
SELECT database_name, table_name
|
|
@@ -316,14 +338,26 @@ def get_tables_from_backup(db, label: str, group: Optional[str] = None) -> List[
|
|
|
316
338
|
if not group_rows:
|
|
317
339
|
return []
|
|
318
340
|
|
|
319
|
-
group_tables =
|
|
341
|
+
group_tables = set()
|
|
342
|
+
for row in group_rows:
|
|
343
|
+
database_name, table_name = row[0], row[1]
|
|
344
|
+
if table_name == '*':
|
|
345
|
+
show_tables_query = f"SHOW TABLES FROM {database_name}"
|
|
346
|
+
try:
|
|
347
|
+
tables_rows = db.query(show_tables_query)
|
|
348
|
+
for table_row in tables_rows:
|
|
349
|
+
group_tables.add(f"{database_name}.{table_row[0]}")
|
|
350
|
+
except Exception:
|
|
351
|
+
continue
|
|
352
|
+
else:
|
|
353
|
+
group_tables.add(f"{database_name}.{table_name}")
|
|
320
354
|
|
|
321
355
|
tables = [table for table in tables if table in group_tables]
|
|
322
356
|
|
|
323
357
|
return tables
|
|
324
358
|
|
|
325
359
|
|
|
326
|
-
def execute_restore_flow(db, repo_name: str, restore_pair: List[str], tables_to_restore: List[str], rename_suffix: str = "_restored") -> Dict:
|
|
360
|
+
def execute_restore_flow(db, repo_name: str, restore_pair: List[str], tables_to_restore: List[str], rename_suffix: str = "_restored", skip_confirmation: bool = False) -> Dict:
|
|
327
361
|
"""Execute the complete restore flow with safety measures.
|
|
328
362
|
|
|
329
363
|
Args:
|
|
@@ -332,6 +366,7 @@ def execute_restore_flow(db, repo_name: str, restore_pair: List[str], tables_to_
|
|
|
332
366
|
restore_pair: List of backup labels in restore order
|
|
333
367
|
tables_to_restore: List of tables to restore (format: database.table)
|
|
334
368
|
rename_suffix: Suffix for temporary tables
|
|
369
|
+
skip_confirmation: If True, skip interactive confirmation prompt
|
|
335
370
|
|
|
336
371
|
Returns:
|
|
337
372
|
Dictionary with success status and details
|
|
@@ -358,12 +393,15 @@ def execute_restore_flow(db, repo_name: str, restore_pair: List[str], tables_to_
|
|
|
358
393
|
logger.info("This will restore data to temporary tables and then perform atomic rename.")
|
|
359
394
|
logger.warning("WARNING: This operation will replace existing tables!")
|
|
360
395
|
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
396
|
+
if not skip_confirmation:
|
|
397
|
+
confirmation = input("\nDo you want to proceed? [Y/n]: ").strip()
|
|
398
|
+
if confirmation.lower() != 'y':
|
|
399
|
+
return {
|
|
400
|
+
"success": False,
|
|
401
|
+
"error_message": "Restore operation cancelled by user"
|
|
402
|
+
}
|
|
403
|
+
else:
|
|
404
|
+
logger.info("Proceeding automatically (--yes flag provided)")
|
|
367
405
|
|
|
368
406
|
try:
|
|
369
407
|
database_name = tables_to_restore[0].split('.')[0]
|
|
@@ -470,6 +508,19 @@ def _build_restore_command_without_rename(backup_label: str, repo_name: str, tab
|
|
|
470
508
|
PROPERTIES ("backup_timestamp" = "{backup_timestamp}")"""
|
|
471
509
|
|
|
472
510
|
|
|
511
|
+
def _generate_timestamped_backup_name(table_name: str) -> str:
|
|
512
|
+
"""Generate a timestamped backup table name.
|
|
513
|
+
|
|
514
|
+
Args:
|
|
515
|
+
table_name: Original table name
|
|
516
|
+
|
|
517
|
+
Returns:
|
|
518
|
+
Timestamped backup name in format: {table_name}_backup_YYYYMMDD_HHMMSS
|
|
519
|
+
"""
|
|
520
|
+
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
521
|
+
return f"{table_name}_backup_{timestamp}"
|
|
522
|
+
|
|
523
|
+
|
|
473
524
|
def _perform_atomic_rename(db, tables: List[str], rename_suffix: str) -> Dict:
|
|
474
525
|
"""Perform atomic rename of temporary tables to make them live."""
|
|
475
526
|
try:
|
|
@@ -477,8 +528,9 @@ def _perform_atomic_rename(db, tables: List[str], rename_suffix: str) -> Dict:
|
|
|
477
528
|
for table in tables:
|
|
478
529
|
database, table_name = table.split('.', 1)
|
|
479
530
|
temp_table_name = f"{table_name}{rename_suffix}"
|
|
531
|
+
backup_table_name = _generate_timestamped_backup_name(table_name)
|
|
480
532
|
|
|
481
|
-
rename_statements.append(f"ALTER TABLE {database}.{table_name} RENAME {
|
|
533
|
+
rename_statements.append(f"ALTER TABLE {database}.{table_name} RENAME {backup_table_name}")
|
|
482
534
|
rename_statements.append(f"ALTER TABLE {database}.{temp_table_name} RENAME {table_name}")
|
|
483
535
|
|
|
484
536
|
for statement in rename_statements:
|
starrocks_br/schema.py
CHANGED
|
@@ -71,8 +71,9 @@ def get_table_inventory_schema() -> str:
|
|
|
71
71
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
72
72
|
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
|
73
73
|
)
|
|
74
|
-
|
|
74
|
+
UNIQUE KEY (inventory_group, database_name, table_name)
|
|
75
75
|
COMMENT "Inventory groups mapping to databases/tables (supports '*' wildcard)"
|
|
76
|
+
DISTRIBUTED BY HASH(inventory_group)
|
|
76
77
|
"""
|
|
77
78
|
|
|
78
79
|
|
|
@@ -120,7 +121,7 @@ def get_run_status_schema() -> str:
|
|
|
120
121
|
CREATE TABLE IF NOT EXISTS ops.run_status (
|
|
121
122
|
scope STRING NOT NULL COMMENT "Job scope: backup or restore",
|
|
122
123
|
label STRING NOT NULL COMMENT "Job label or identifier",
|
|
123
|
-
state STRING NOT NULL DEFAULT "ACTIVE" COMMENT "Job state: ACTIVE or
|
|
124
|
+
state STRING NOT NULL DEFAULT "ACTIVE" COMMENT "Job state: ACTIVE, FINISHED, FAILED, or CANCELLED",
|
|
124
125
|
started_at DATETIME DEFAULT CURRENT_TIMESTAMP COMMENT "Job start timestamp",
|
|
125
126
|
finished_at DATETIME COMMENT "Job completion timestamp"
|
|
126
127
|
)
|
|
@@ -133,12 +134,14 @@ def get_backup_partitions_schema() -> str:
|
|
|
133
134
|
"""Get CREATE TABLE statement for backup_partitions."""
|
|
134
135
|
return """
|
|
135
136
|
CREATE TABLE IF NOT EXISTS ops.backup_partitions (
|
|
137
|
+
key_hash STRING NOT NULL COMMENT "MD5 hash of composite key (label, database_name, table_name, partition_name)",
|
|
136
138
|
label STRING NOT NULL COMMENT "The backup label this partition belongs to. FK to ops.backup_history.label.",
|
|
137
139
|
database_name STRING NOT NULL COMMENT "The name of the database the partition belongs to.",
|
|
138
140
|
table_name STRING NOT NULL COMMENT "The name of the table the partition belongs to.",
|
|
139
141
|
partition_name STRING NOT NULL COMMENT "The name of the specific partition.",
|
|
140
142
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP COMMENT "Timestamp when this record was created."
|
|
141
143
|
)
|
|
142
|
-
PRIMARY KEY (
|
|
144
|
+
PRIMARY KEY (key_hash)
|
|
143
145
|
COMMENT "Tracks every partition included in a backup snapshot."
|
|
146
|
+
DISTRIBUTED BY HASH(key_hash)
|
|
144
147
|
"""
|
starrocks_br/timezone.py
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
from typing import Union
|
|
3
|
+
from zoneinfo import ZoneInfo
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def get_current_time_in_cluster_tz(cluster_tz: str) -> str:
|
|
7
|
+
"""Get current time formatted in cluster timezone.
|
|
8
|
+
|
|
9
|
+
Args:
|
|
10
|
+
cluster_tz: Timezone string (e.g., 'Asia/Shanghai', 'UTC', '+08:00')
|
|
11
|
+
|
|
12
|
+
Returns:
|
|
13
|
+
Formatted datetime string in 'YYYY-MM-DD HH:MM:SS' format in the cluster timezone
|
|
14
|
+
"""
|
|
15
|
+
tz = _get_timezone(cluster_tz)
|
|
16
|
+
now = datetime.datetime.now(tz)
|
|
17
|
+
return now.strftime("%Y-%m-%d %H:%M:%S")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def parse_datetime_with_tz(dt_str: str, tz: str) -> datetime.datetime:
|
|
21
|
+
"""Parse datetime string assuming the given timezone.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
dt_str: Datetime string in 'YYYY-MM-DD HH:MM:SS' format
|
|
25
|
+
tz: Timezone string (e.g., 'Asia/Shanghai', 'UTC', '+08:00')
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
Timezone-aware datetime object
|
|
29
|
+
"""
|
|
30
|
+
timezone = _get_timezone(tz)
|
|
31
|
+
|
|
32
|
+
dt = datetime.datetime.strptime(dt_str, "%Y-%m-%d %H:%M:%S")
|
|
33
|
+
dt = dt.replace(tzinfo=timezone)
|
|
34
|
+
|
|
35
|
+
return dt
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def normalize_datetime_to_tz(dt: datetime.datetime, target_tz: str) -> datetime.datetime:
|
|
39
|
+
"""Convert datetime to target timezone.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
dt: Datetime object (timezone-aware or naive)
|
|
43
|
+
target_tz: Target timezone string (e.g., 'Asia/Shanghai', 'UTC', '+08:00')
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
Timezone-aware datetime object in the target timezone
|
|
47
|
+
"""
|
|
48
|
+
timezone = _get_timezone(target_tz)
|
|
49
|
+
|
|
50
|
+
if dt.tzinfo is None:
|
|
51
|
+
dt = dt.replace(tzinfo=datetime.timezone.utc)
|
|
52
|
+
|
|
53
|
+
dt = dt.astimezone(timezone)
|
|
54
|
+
|
|
55
|
+
return dt
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _get_timezone(tz_str: str) -> Union[ZoneInfo, datetime.timezone]:
|
|
59
|
+
"""Get timezone object from timezone string.
|
|
60
|
+
|
|
61
|
+
Handles both named timezones (e.g., 'Asia/Shanghai') and offset strings (e.g., '+08:00', '-05:00').
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
tz_str: Timezone string
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
ZoneInfo or timezone object
|
|
68
|
+
"""
|
|
69
|
+
tz_str = tz_str.strip()
|
|
70
|
+
|
|
71
|
+
if tz_str.upper() == "UTC" or tz_str == "+00:00" or tz_str == "-00:00":
|
|
72
|
+
return ZoneInfo("UTC")
|
|
73
|
+
|
|
74
|
+
if tz_str.startswith(("+", "-")):
|
|
75
|
+
try:
|
|
76
|
+
hours, minutes = _parse_offset(tz_str)
|
|
77
|
+
offset = datetime.timedelta(hours=hours, minutes=minutes)
|
|
78
|
+
return datetime.timezone(offset)
|
|
79
|
+
except ValueError:
|
|
80
|
+
return ZoneInfo("UTC")
|
|
81
|
+
|
|
82
|
+
try:
|
|
83
|
+
return ZoneInfo(tz_str)
|
|
84
|
+
except Exception:
|
|
85
|
+
return ZoneInfo("UTC")
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _parse_offset(offset_str: str) -> tuple[int, int]:
|
|
89
|
+
"""Parse timezone offset string to hours and minutes.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
offset_str: Offset string in format '+HH:MM' or '-HH:MM'
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
Tuple of (hours, minutes)
|
|
96
|
+
|
|
97
|
+
Raises:
|
|
98
|
+
ValueError: If offset string is invalid, including:
|
|
99
|
+
- String length < 6 characters
|
|
100
|
+
- Invalid format (missing colon, invalid characters)
|
|
101
|
+
- Hours >= 24 or < 0
|
|
102
|
+
- Minutes >= 60 or < 0
|
|
103
|
+
"""
|
|
104
|
+
if len(offset_str) < 6:
|
|
105
|
+
raise ValueError(f"Invalid offset format: {offset_str}")
|
|
106
|
+
|
|
107
|
+
if offset_str[3] != ':':
|
|
108
|
+
raise ValueError(f"Invalid offset format: {offset_str} (missing colon)")
|
|
109
|
+
|
|
110
|
+
sign = 1 if offset_str[0] == '+' else -1
|
|
111
|
+
|
|
112
|
+
try:
|
|
113
|
+
hours = int(offset_str[1:3])
|
|
114
|
+
minutes = int(offset_str[4:6])
|
|
115
|
+
except ValueError as e:
|
|
116
|
+
raise ValueError(f"Invalid offset format: {offset_str} (non-numeric values)") from e
|
|
117
|
+
|
|
118
|
+
if hours < 0 or hours >= 24:
|
|
119
|
+
raise ValueError(f"Invalid offset format: {offset_str} (hours must be 00-23)")
|
|
120
|
+
|
|
121
|
+
if minutes < 0 or minutes >= 60:
|
|
122
|
+
raise ValueError(f"Invalid offset format: {offset_str} (minutes must be 00-59)")
|
|
123
|
+
|
|
124
|
+
return sign * hours, sign * minutes
|
|
125
|
+
|