starrocks-br 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
starrocks_br/restore.py CHANGED
@@ -3,7 +3,7 @@ import datetime
3
3
  from typing import Dict, List, Optional
4
4
  from . import history, concurrency, logger
5
5
 
6
- MAX_POLLS = 21600 # 6 hours
6
+ MAX_POLLS = 86400 # 1 day
7
7
 
8
8
  def get_snapshot_timestamp(db, repo_name: str, snapshot_name: str) -> str:
9
9
  """Get the backup timestamp for a specific snapshot from the repository.
@@ -281,17 +281,30 @@ def find_restore_pair(db, target_label: str) -> List[str]:
281
281
  raise ValueError(f"Unknown backup type '{target_info['backup_type']}' for label '{target_label}'")
282
282
 
283
283
 
284
- def get_tables_from_backup(db, label: str, group: Optional[str] = None) -> List[str]:
284
+ def get_tables_from_backup(db, label: str, group: Optional[str] = None, table: Optional[str] = None, database: Optional[str] = None) -> List[str]:
285
285
  """Get list of tables to restore from backup manifest.
286
286
 
287
287
  Args:
288
288
  db: Database connection
289
289
  label: Backup label
290
290
  group: Optional inventory group to filter tables
291
+ table: Optional table name to filter (single table, database comes from database parameter)
292
+ database: Database name (required if table is specified)
291
293
 
292
294
  Returns:
293
- List of table names to restore
295
+ List of table names to restore (format: database.table)
296
+
297
+ Raises:
298
+ ValueError: If both group and table are specified
299
+ ValueError: If table is specified but database is not provided
300
+ ValueError: If table is specified but not found in backup
294
301
  """
302
+ if group and table:
303
+ raise ValueError("Cannot specify both --group and --table. Use --table for single table restore or --group for inventory group restore.")
304
+
305
+ if table and not database:
306
+ raise ValueError("database parameter is required when table is specified")
307
+
295
308
  query = f"""
296
309
  SELECT DISTINCT database_name, table_name
297
310
  FROM ops.backup_partitions
@@ -305,6 +318,15 @@ def get_tables_from_backup(db, label: str, group: Optional[str] = None) -> List[
305
318
 
306
319
  tables = [f"{row[0]}.{row[1]}" for row in rows]
307
320
 
321
+ if table:
322
+ target_table = f"{database}.{table}"
323
+ filtered_tables = [t for t in tables if t == target_table]
324
+
325
+ if not filtered_tables:
326
+ raise ValueError(f"Table '{table}' not found in backup '{label}' for database '{database}'")
327
+
328
+ return filtered_tables
329
+
308
330
  if group:
309
331
  group_query = f"""
310
332
  SELECT database_name, table_name
@@ -316,14 +338,26 @@ def get_tables_from_backup(db, label: str, group: Optional[str] = None) -> List[
316
338
  if not group_rows:
317
339
  return []
318
340
 
319
- group_tables = {f"{row[0]}.{row[1]}" for row in group_rows}
341
+ group_tables = set()
342
+ for row in group_rows:
343
+ database_name, table_name = row[0], row[1]
344
+ if table_name == '*':
345
+ show_tables_query = f"SHOW TABLES FROM {database_name}"
346
+ try:
347
+ tables_rows = db.query(show_tables_query)
348
+ for table_row in tables_rows:
349
+ group_tables.add(f"{database_name}.{table_row[0]}")
350
+ except Exception:
351
+ continue
352
+ else:
353
+ group_tables.add(f"{database_name}.{table_name}")
320
354
 
321
355
  tables = [table for table in tables if table in group_tables]
322
356
 
323
357
  return tables
324
358
 
325
359
 
326
- def execute_restore_flow(db, repo_name: str, restore_pair: List[str], tables_to_restore: List[str], rename_suffix: str = "_restored") -> Dict:
360
+ def execute_restore_flow(db, repo_name: str, restore_pair: List[str], tables_to_restore: List[str], rename_suffix: str = "_restored", skip_confirmation: bool = False) -> Dict:
327
361
  """Execute the complete restore flow with safety measures.
328
362
 
329
363
  Args:
@@ -332,6 +366,7 @@ def execute_restore_flow(db, repo_name: str, restore_pair: List[str], tables_to_
332
366
  restore_pair: List of backup labels in restore order
333
367
  tables_to_restore: List of tables to restore (format: database.table)
334
368
  rename_suffix: Suffix for temporary tables
369
+ skip_confirmation: If True, skip interactive confirmation prompt
335
370
 
336
371
  Returns:
337
372
  Dictionary with success status and details
@@ -358,12 +393,15 @@ def execute_restore_flow(db, repo_name: str, restore_pair: List[str], tables_to_
358
393
  logger.info("This will restore data to temporary tables and then perform atomic rename.")
359
394
  logger.warning("WARNING: This operation will replace existing tables!")
360
395
 
361
- confirmation = input("\nDo you want to proceed? [Y/n]: ").strip()
362
- if confirmation.lower() != 'y':
363
- return {
364
- "success": False,
365
- "error_message": "Restore operation cancelled by user"
366
- }
396
+ if not skip_confirmation:
397
+ confirmation = input("\nDo you want to proceed? [Y/n]: ").strip()
398
+ if confirmation.lower() != 'y':
399
+ return {
400
+ "success": False,
401
+ "error_message": "Restore operation cancelled by user"
402
+ }
403
+ else:
404
+ logger.info("Proceeding automatically (--yes flag provided)")
367
405
 
368
406
  try:
369
407
  database_name = tables_to_restore[0].split('.')[0]
@@ -470,6 +508,19 @@ def _build_restore_command_without_rename(backup_label: str, repo_name: str, tab
470
508
  PROPERTIES ("backup_timestamp" = "{backup_timestamp}")"""
471
509
 
472
510
 
511
+ def _generate_timestamped_backup_name(table_name: str) -> str:
512
+ """Generate a timestamped backup table name.
513
+
514
+ Args:
515
+ table_name: Original table name
516
+
517
+ Returns:
518
+ Timestamped backup name in format: {table_name}_backup_YYYYMMDD_HHMMSS
519
+ """
520
+ timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
521
+ return f"{table_name}_backup_{timestamp}"
522
+
523
+
473
524
  def _perform_atomic_rename(db, tables: List[str], rename_suffix: str) -> Dict:
474
525
  """Perform atomic rename of temporary tables to make them live."""
475
526
  try:
@@ -477,8 +528,9 @@ def _perform_atomic_rename(db, tables: List[str], rename_suffix: str) -> Dict:
477
528
  for table in tables:
478
529
  database, table_name = table.split('.', 1)
479
530
  temp_table_name = f"{table_name}{rename_suffix}"
531
+ backup_table_name = _generate_timestamped_backup_name(table_name)
480
532
 
481
- rename_statements.append(f"ALTER TABLE {database}.{table_name} RENAME {table_name}_backup")
533
+ rename_statements.append(f"ALTER TABLE {database}.{table_name} RENAME {backup_table_name}")
482
534
  rename_statements.append(f"ALTER TABLE {database}.{temp_table_name} RENAME {table_name}")
483
535
 
484
536
  for statement in rename_statements:
starrocks_br/schema.py CHANGED
@@ -71,8 +71,9 @@ def get_table_inventory_schema() -> str:
71
71
  created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
72
72
  updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
73
73
  )
74
- PRIMARY KEY (inventory_group, database_name, table_name)
74
+ UNIQUE KEY (inventory_group, database_name, table_name)
75
75
  COMMENT "Inventory groups mapping to databases/tables (supports '*' wildcard)"
76
+ DISTRIBUTED BY HASH(inventory_group)
76
77
  """
77
78
 
78
79
 
@@ -120,7 +121,7 @@ def get_run_status_schema() -> str:
120
121
  CREATE TABLE IF NOT EXISTS ops.run_status (
121
122
  scope STRING NOT NULL COMMENT "Job scope: backup or restore",
122
123
  label STRING NOT NULL COMMENT "Job label or identifier",
123
- state STRING NOT NULL DEFAULT "ACTIVE" COMMENT "Job state: ACTIVE or COMPLETED",
124
+ state STRING NOT NULL DEFAULT "ACTIVE" COMMENT "Job state: ACTIVE, FINISHED, FAILED, or CANCELLED",
124
125
  started_at DATETIME DEFAULT CURRENT_TIMESTAMP COMMENT "Job start timestamp",
125
126
  finished_at DATETIME COMMENT "Job completion timestamp"
126
127
  )
@@ -133,12 +134,14 @@ def get_backup_partitions_schema() -> str:
133
134
  """Get CREATE TABLE statement for backup_partitions."""
134
135
  return """
135
136
  CREATE TABLE IF NOT EXISTS ops.backup_partitions (
137
+ key_hash STRING NOT NULL COMMENT "MD5 hash of composite key (label, database_name, table_name, partition_name)",
136
138
  label STRING NOT NULL COMMENT "The backup label this partition belongs to. FK to ops.backup_history.label.",
137
139
  database_name STRING NOT NULL COMMENT "The name of the database the partition belongs to.",
138
140
  table_name STRING NOT NULL COMMENT "The name of the table the partition belongs to.",
139
141
  partition_name STRING NOT NULL COMMENT "The name of the specific partition.",
140
142
  created_at DATETIME DEFAULT CURRENT_TIMESTAMP COMMENT "Timestamp when this record was created."
141
143
  )
142
- PRIMARY KEY (label, database_name, table_name, partition_name)
144
+ PRIMARY KEY (key_hash)
143
145
  COMMENT "Tracks every partition included in a backup snapshot."
146
+ DISTRIBUTED BY HASH(key_hash)
144
147
  """
@@ -0,0 +1,125 @@
1
+ import datetime
2
+ from typing import Union
3
+ from zoneinfo import ZoneInfo
4
+
5
+
6
+ def get_current_time_in_cluster_tz(cluster_tz: str) -> str:
7
+ """Get current time formatted in cluster timezone.
8
+
9
+ Args:
10
+ cluster_tz: Timezone string (e.g., 'Asia/Shanghai', 'UTC', '+08:00')
11
+
12
+ Returns:
13
+ Formatted datetime string in 'YYYY-MM-DD HH:MM:SS' format in the cluster timezone
14
+ """
15
+ tz = _get_timezone(cluster_tz)
16
+ now = datetime.datetime.now(tz)
17
+ return now.strftime("%Y-%m-%d %H:%M:%S")
18
+
19
+
20
+ def parse_datetime_with_tz(dt_str: str, tz: str) -> datetime.datetime:
21
+ """Parse datetime string assuming the given timezone.
22
+
23
+ Args:
24
+ dt_str: Datetime string in 'YYYY-MM-DD HH:MM:SS' format
25
+ tz: Timezone string (e.g., 'Asia/Shanghai', 'UTC', '+08:00')
26
+
27
+ Returns:
28
+ Timezone-aware datetime object
29
+ """
30
+ timezone = _get_timezone(tz)
31
+
32
+ dt = datetime.datetime.strptime(dt_str, "%Y-%m-%d %H:%M:%S")
33
+ dt = dt.replace(tzinfo=timezone)
34
+
35
+ return dt
36
+
37
+
38
+ def normalize_datetime_to_tz(dt: datetime.datetime, target_tz: str) -> datetime.datetime:
39
+ """Convert datetime to target timezone.
40
+
41
+ Args:
42
+ dt: Datetime object (timezone-aware or naive)
43
+ target_tz: Target timezone string (e.g., 'Asia/Shanghai', 'UTC', '+08:00')
44
+
45
+ Returns:
46
+ Timezone-aware datetime object in the target timezone
47
+ """
48
+ timezone = _get_timezone(target_tz)
49
+
50
+ if dt.tzinfo is None:
51
+ dt = dt.replace(tzinfo=datetime.timezone.utc)
52
+
53
+ dt = dt.astimezone(timezone)
54
+
55
+ return dt
56
+
57
+
58
+ def _get_timezone(tz_str: str) -> Union[ZoneInfo, datetime.timezone]:
59
+ """Get timezone object from timezone string.
60
+
61
+ Handles both named timezones (e.g., 'Asia/Shanghai') and offset strings (e.g., '+08:00', '-05:00').
62
+
63
+ Args:
64
+ tz_str: Timezone string
65
+
66
+ Returns:
67
+ ZoneInfo or timezone object
68
+ """
69
+ tz_str = tz_str.strip()
70
+
71
+ if tz_str.upper() == "UTC" or tz_str == "+00:00" or tz_str == "-00:00":
72
+ return ZoneInfo("UTC")
73
+
74
+ if tz_str.startswith(("+", "-")):
75
+ try:
76
+ hours, minutes = _parse_offset(tz_str)
77
+ offset = datetime.timedelta(hours=hours, minutes=minutes)
78
+ return datetime.timezone(offset)
79
+ except ValueError:
80
+ return ZoneInfo("UTC")
81
+
82
+ try:
83
+ return ZoneInfo(tz_str)
84
+ except Exception:
85
+ return ZoneInfo("UTC")
86
+
87
+
88
+ def _parse_offset(offset_str: str) -> tuple[int, int]:
89
+ """Parse timezone offset string to hours and minutes.
90
+
91
+ Args:
92
+ offset_str: Offset string in format '+HH:MM' or '-HH:MM'
93
+
94
+ Returns:
95
+ Tuple of (hours, minutes)
96
+
97
+ Raises:
98
+ ValueError: If offset string is invalid, including:
99
+ - String length < 6 characters
100
+ - Invalid format (missing colon, invalid characters)
101
+ - Hours >= 24 or < 0
102
+ - Minutes >= 60 or < 0
103
+ """
104
+ if len(offset_str) < 6:
105
+ raise ValueError(f"Invalid offset format: {offset_str}")
106
+
107
+ if offset_str[3] != ':':
108
+ raise ValueError(f"Invalid offset format: {offset_str} (missing colon)")
109
+
110
+ sign = 1 if offset_str[0] == '+' else -1
111
+
112
+ try:
113
+ hours = int(offset_str[1:3])
114
+ minutes = int(offset_str[4:6])
115
+ except ValueError as e:
116
+ raise ValueError(f"Invalid offset format: {offset_str} (non-numeric values)") from e
117
+
118
+ if hours < 0 or hours >= 24:
119
+ raise ValueError(f"Invalid offset format: {offset_str} (hours must be 00-23)")
120
+
121
+ if minutes < 0 or minutes >= 60:
122
+ raise ValueError(f"Invalid offset format: {offset_str} (minutes must be 00-59)")
123
+
124
+ return sign * hours, sign * minutes
125
+