starrocks-br 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
starrocks_br/cli.py CHANGED
@@ -15,6 +15,40 @@ from . import schema
15
15
  from . import logger
16
16
 
17
17
 
18
+ def _handle_snapshot_exists_error(error_details: dict, label: str, config: str, repository: str, backup_type: str, group: str, baseline_backup: str = None) -> None:
19
+ """Handle snapshot_exists error by providing helpful guidance to the user.
20
+
21
+ Args:
22
+ error_details: Error details dict containing error_type and snapshot_name
23
+ label: The backup label that was generated
24
+ config: Path to config file
25
+ repository: Repository name
26
+ backup_type: Type of backup ('incremental' or 'full')
27
+ group: Inventory group name
28
+ baseline_backup: Optional baseline backup label (for incremental backups)
29
+ """
30
+ snapshot_name = error_details.get('snapshot_name', label)
31
+ logger.error(f"Snapshot '{snapshot_name}' already exists in the repository.")
32
+ logger.info("")
33
+ logger.info("This typically happens when:")
34
+ logger.info(" • The CLI lost connectivity during a previous backup operation")
35
+ logger.info(" • The backup completed on the server, but backup_history wasn't updated")
36
+ logger.info("")
37
+ logger.info("To resolve this, retry the backup with a custom label using --name:")
38
+
39
+ if backup_type == 'incremental':
40
+ retry_cmd = f" starrocks-br backup incremental --config {config} --group {group} --name {snapshot_name}_retry"
41
+ if baseline_backup:
42
+ retry_cmd += f" --baseline-backup {baseline_backup}"
43
+ logger.info(retry_cmd)
44
+ else:
45
+ logger.info(f" starrocks-br backup full --config {config} --group {group} --name {snapshot_name}_retry")
46
+
47
+ logger.info("")
48
+ logger.tip("You can verify the existing backup by checking the repository or running:")
49
+ logger.tip(f" SHOW SNAPSHOT ON {repository} WHERE Snapshot = '{snapshot_name}'")
50
+
51
+
18
52
  @click.group()
19
53
  def cli():
20
54
  """StarRocks Backup & Restore automation tool."""
@@ -43,7 +77,8 @@ def init(config):
43
77
  port=cfg['port'],
44
78
  user=cfg['user'],
45
79
  password=os.getenv('STARROCKS_PASSWORD'),
46
- database=cfg['database']
80
+ database=cfg['database'],
81
+ tls_config=cfg.get('tls'),
47
82
  )
48
83
 
49
84
  with database:
@@ -102,7 +137,8 @@ def backup_incremental(config, baseline_backup, group, name):
102
137
  port=cfg['port'],
103
138
  user=cfg['user'],
104
139
  password=os.getenv('STARROCKS_PASSWORD'),
105
- database=cfg['database']
140
+ database=cfg['database'],
141
+ tls_config=cfg.get('tls'),
106
142
  )
107
143
 
108
144
  with database:
@@ -174,6 +210,13 @@ def backup_incremental(config, baseline_backup, group, name):
174
210
  logger.success(f"Backup completed successfully: {result['final_status']['state']}")
175
211
  sys.exit(0)
176
212
  else:
213
+ error_details = result.get('error_details')
214
+ if error_details and error_details.get('error_type') == 'snapshot_exists':
215
+ _handle_snapshot_exists_error(
216
+ error_details, label, config, cfg['repository'], 'incremental', group, baseline_backup
217
+ )
218
+ sys.exit(1)
219
+
177
220
  state = result.get('final_status', {}).get('state', 'UNKNOWN')
178
221
  if state == "LOST":
179
222
  logger.critical("Backup tracking lost!")
@@ -215,7 +258,8 @@ def backup_full(config, group, name):
215
258
  port=cfg['port'],
216
259
  user=cfg['user'],
217
260
  password=os.getenv('STARROCKS_PASSWORD'),
218
- database=cfg['database']
261
+ database=cfg['database'],
262
+ tls_config=cfg.get('tls'),
219
263
  )
220
264
 
221
265
  with database:
@@ -274,6 +318,13 @@ def backup_full(config, group, name):
274
318
  logger.success(f"Backup completed successfully: {result['final_status']['state']}")
275
319
  sys.exit(0)
276
320
  else:
321
+ error_details = result.get('error_details')
322
+ if error_details and error_details.get('error_type') == 'snapshot_exists':
323
+ _handle_snapshot_exists_error(
324
+ error_details, label, config, cfg['repository'], 'full', group
325
+ )
326
+ sys.exit(1)
327
+
277
328
  state = result.get('final_status', {}).get('state', 'UNKNOWN')
278
329
  if state == "LOST":
279
330
  logger.critical("Backup tracking lost!")
@@ -300,8 +351,10 @@ def backup_full(config, group, name):
300
351
  @click.option('--config', required=True, help='Path to config YAML file')
301
352
  @click.option('--target-label', required=True, help='Backup label to restore to')
302
353
  @click.option('--group', help='Optional inventory group to filter tables to restore')
354
+ @click.option('--table', help='Optional table name to restore (table name only, database comes from config). Cannot be used with --group.')
303
355
  @click.option('--rename-suffix', default='_restored', help='Suffix for temporary tables during restore (default: _restored)')
304
- def restore_command(config, target_label, group, rename_suffix):
356
+ @click.option('--yes', is_flag=True, help='Skip confirmation prompt and proceed automatically')
357
+ def restore_command(config, target_label, group, table, rename_suffix, yes):
305
358
  """Restore data to a specific point in time using intelligent backup chain resolution.
306
359
 
307
360
  This command automatically determines the correct sequence of backups needed for restore:
@@ -311,9 +364,23 @@ def restore_command(config, target_label, group, rename_suffix):
311
364
  The restore process uses temporary tables with the specified suffix for safety, then performs
312
365
  an atomic rename to make the restored data live.
313
366
 
314
- Flow: load config → find restore pair → get tables from backup → execute restore flow
367
+ Flow: load config → check health → ensure repository → find restore pair → get tables from backup → execute restore flow
315
368
  """
316
369
  try:
370
+ if group and table:
371
+ logger.error("Cannot specify both --group and --table. Use --table for single table restore or --group for inventory group restore.")
372
+ sys.exit(1)
373
+
374
+ if table:
375
+ table = table.strip()
376
+ if not table:
377
+ logger.error("Table name cannot be empty")
378
+ sys.exit(1)
379
+
380
+ if '.' in table:
381
+ logger.error("Table name must not include database prefix. Use 'table_name' not 'database.table_name'. Database comes from config file.")
382
+ sys.exit(1)
383
+
317
384
  cfg = config_module.load_config(config)
318
385
  config_module.validate_config(cfg)
319
386
 
@@ -322,7 +389,8 @@ def restore_command(config, target_label, group, rename_suffix):
322
389
  port=cfg['port'],
323
390
  user=cfg['user'],
324
391
  password=os.getenv('STARROCKS_PASSWORD'),
325
- database=cfg['database']
392
+ database=cfg['database'],
393
+ tls_config=cfg.get('tls'),
326
394
  )
327
395
 
328
396
  with database:
@@ -332,6 +400,17 @@ def restore_command(config, target_label, group, rename_suffix):
332
400
  logger.warning("Remember to populate ops.table_inventory with your backup groups!")
333
401
  sys.exit(1) # Exit if schema was just created, requires user action
334
402
 
403
+ healthy, message = health.check_cluster_health(database)
404
+ if not healthy:
405
+ logger.error(f"Cluster health check failed: {message}")
406
+ sys.exit(1)
407
+
408
+ logger.success(f"Cluster health: {message}")
409
+
410
+ repository.ensure_repository(database, cfg['repository'])
411
+
412
+ logger.success(f"Repository '{cfg['repository']}' verified")
413
+
335
414
  logger.info(f"Finding restore sequence for target backup: {target_label}")
336
415
 
337
416
  try:
@@ -342,11 +421,24 @@ def restore_command(config, target_label, group, rename_suffix):
342
421
  sys.exit(1)
343
422
 
344
423
  logger.info("Determining tables to restore from backup manifest...")
345
- tables_to_restore = restore.get_tables_from_backup(database, target_label, group)
424
+
425
+ try:
426
+ tables_to_restore = restore.get_tables_from_backup(
427
+ database,
428
+ target_label,
429
+ group=group,
430
+ table=table,
431
+ database=cfg['database'] if table else None
432
+ )
433
+ except ValueError as e:
434
+ logger.error(str(e))
435
+ sys.exit(1)
346
436
 
347
437
  if not tables_to_restore:
348
438
  if group:
349
439
  logger.warning(f"No tables found in backup '{target_label}' for group '{group}'")
440
+ elif table:
441
+ logger.warning(f"No tables found in backup '{target_label}' for table '{table}'")
350
442
  else:
351
443
  logger.warning(f"No tables found in backup '{target_label}'")
352
444
  sys.exit(1)
@@ -359,7 +451,8 @@ def restore_command(config, target_label, group, rename_suffix):
359
451
  cfg['repository'],
360
452
  restore_pair,
361
453
  tables_to_restore,
362
- rename_suffix
454
+ rename_suffix,
455
+ skip_confirmation=yes
363
456
  )
364
457
 
365
458
  if result['success']:
@@ -375,6 +468,9 @@ def restore_command(config, target_label, group, rename_suffix):
375
468
  except ValueError as e:
376
469
  logger.error(f"Configuration error: {e}")
377
470
  sys.exit(1)
471
+ except RuntimeError as e:
472
+ logger.error(f"{e}")
473
+ sys.exit(1)
378
474
  except Exception as e:
379
475
  logger.error(f"Unexpected error: {e}")
380
476
  sys.exit(1)
starrocks_br/config.py CHANGED
@@ -1,8 +1,8 @@
1
1
  import yaml
2
- from typing import Dict
2
+ from typing import Any, Dict, Optional
3
3
 
4
4
 
5
- def load_config(config_path: str) -> Dict:
5
+ def load_config(config_path: str) -> Dict[str, Any]:
6
6
  """Load and parse YAML configuration file.
7
7
 
8
8
  Args:
@@ -24,7 +24,7 @@ def load_config(config_path: str) -> Dict:
24
24
  return config
25
25
 
26
26
 
27
- def validate_config(config: Dict) -> None:
27
+ def validate_config(config: Dict[str, Any]) -> None:
28
28
  """Validate that config contains required fields.
29
29
 
30
30
  Args:
@@ -39,3 +39,26 @@ def validate_config(config: Dict) -> None:
39
39
  if field not in config:
40
40
  raise ValueError(f"Missing required config field: {field}")
41
41
 
42
+ _validate_tls_section(config.get('tls'))
43
+
44
+
45
+ def _validate_tls_section(tls_config) -> None:
46
+ if tls_config is None:
47
+ return
48
+
49
+ if not isinstance(tls_config, dict):
50
+ raise ValueError("TLS configuration must be a dictionary")
51
+
52
+ enabled = bool(tls_config.get('enabled', False))
53
+
54
+ if enabled and not tls_config.get('ca_cert'):
55
+ raise ValueError("TLS configuration requires 'ca_cert' when 'enabled' is true")
56
+
57
+ if 'verify_server_cert' in tls_config and not isinstance(tls_config['verify_server_cert'], bool):
58
+ raise ValueError("TLS configuration field 'verify_server_cert' must be a boolean if provided")
59
+
60
+ if 'tls_versions' in tls_config:
61
+ tls_versions = tls_config['tls_versions']
62
+ if not isinstance(tls_versions, list) or not all(isinstance(version, str) for version in tls_versions):
63
+ raise ValueError("TLS configuration field 'tls_versions' must be a list of strings if provided")
64
+
starrocks_br/db.py CHANGED
@@ -1,11 +1,19 @@
1
1
  import mysql.connector
2
- from typing import List
2
+ from typing import Any, Dict, List, Optional
3
3
 
4
4
 
5
5
  class StarRocksDB:
6
6
  """Database connection wrapper for StarRocks."""
7
7
 
8
- def __init__(self, host: str, port: int, user: str, password: str, database: str):
8
+ def __init__(
9
+ self,
10
+ host: str,
11
+ port: int,
12
+ user: str,
13
+ password: str,
14
+ database: str,
15
+ tls_config: Optional[Dict[str, Any]] = None,
16
+ ):
9
17
  """Initialize database connection.
10
18
 
11
19
  Args:
@@ -21,16 +29,44 @@ class StarRocksDB:
21
29
  self.password = password
22
30
  self.database = database
23
31
  self._connection = None
32
+ self.tls_config = tls_config or {}
33
+ self._timezone: Optional[str] = None
24
34
 
25
35
  def connect(self) -> None:
26
36
  """Establish database connection."""
27
- self._connection = mysql.connector.connect(
28
- host=self.host,
29
- port=self.port,
30
- user=self.user,
31
- password=self.password,
32
- database=self.database
33
- )
37
+ conn_args: Dict[str, Any] = {
38
+ 'host': self.host,
39
+ 'port': self.port,
40
+ 'user': self.user,
41
+ 'password': self.password,
42
+ 'database': self.database,
43
+ }
44
+
45
+ if self.tls_config.get('enabled'):
46
+ ssl_args: Dict[str, Any] = {
47
+ 'ssl_ca': self.tls_config.get('ca_cert'),
48
+ 'ssl_cert': self.tls_config.get('client_cert'),
49
+ 'ssl_key': self.tls_config.get('client_key'),
50
+ 'ssl_verify_cert': self.tls_config.get('verify_server_cert', True),
51
+ }
52
+
53
+ tls_versions = self.tls_config.get('tls_versions', ['TLSv1.2', 'TLSv1.3'])
54
+ if tls_versions:
55
+ ssl_args['tls_versions'] = tls_versions
56
+
57
+ conn_args.update({key: value for key, value in ssl_args.items() if value is not None})
58
+
59
+ try:
60
+ self._connection = mysql.connector.connect(**conn_args)
61
+ except mysql.connector.Error as e:
62
+ if self.tls_config.get('enabled') and "SSL is required" in str(e):
63
+ raise mysql.connector.Error(
64
+ f"TLS is enabled in configuration but StarRocks server doesn't support it. "
65
+ f"Error: {e}. "
66
+ f"To fix this, you need to enable TLS/SSL in your StarRocks server configuration. "
67
+ f"Alternatively, set 'enabled: false' in the tls section of your config file."
68
+ ) from e
69
+ raise
34
70
 
35
71
  def close(self) -> None:
36
72
  """Close database connection."""
@@ -85,4 +121,34 @@ class StarRocksDB:
85
121
  def __exit__(self, exc_type, exc_val, exc_tb):
86
122
  """Context manager exit."""
87
123
  self.close()
124
+
125
+ @property
126
+ def timezone(self) -> str:
127
+ """Get the StarRocks cluster timezone.
128
+
129
+ Queries the cluster timezone on first access and caches it for subsequent use.
130
+ If the query fails (e.g., database unavailable, connection error, permissions),
131
+ defaults to 'UTC' to ensure the property always returns a valid timezone string.
132
+
133
+ Returns:
134
+ Timezone string (e.g., 'Asia/Shanghai', 'UTC', '+08:00')
135
+ Defaults to 'UTC' if query fails or returns no results.
136
+ """
137
+ if self._timezone is None:
138
+ try:
139
+ query = "SHOW VARIABLES LIKE 'time_zone'"
140
+ rows = self.query(query)
141
+
142
+ if not rows:
143
+ self._timezone = "UTC"
144
+ else:
145
+ row = rows[0]
146
+ if isinstance(row, dict):
147
+ self._timezone = row.get("Value", "UTC")
148
+ else:
149
+ self._timezone = row[1] if len(row) > 1 else "UTC"
150
+ except Exception:
151
+ self._timezone = "UTC"
152
+
153
+ return self._timezone
88
154
 
starrocks_br/executor.py CHANGED
@@ -1,23 +1,68 @@
1
1
  import time
2
2
  import datetime
3
- from typing import Dict, Literal, Optional
4
- from . import history, concurrency, logger
3
+ import re
4
+ from typing import Dict, Literal, Optional, Tuple
5
+ from . import history, concurrency, logger, timezone
5
6
 
6
- MAX_POLLS = 21600 # 6 hours
7
+ MAX_POLLS = 86400 # 1 day
7
8
 
8
- def submit_backup_command(db, backup_command: str) -> tuple[bool, Optional[str]]:
9
+ def submit_backup_command(db, backup_command: str) -> Tuple[bool, Optional[str], Optional[Dict[str, str]]]:
9
10
  """Submit a backup command to StarRocks.
10
11
 
11
- Returns (success, error_message).
12
+ Returns (success, error_message, error_details).
13
+ error_details is a dict with keys like 'error_type' and 'snapshot_name' for specific error cases.
12
14
  """
13
15
  try:
14
16
  db.execute(backup_command.strip())
15
- return True, None
17
+ return True, None, None
16
18
  except Exception as e:
17
- error_msg = f"Failed to submit backup command: {type(e).__name__}: {str(e)}"
19
+ error_str = str(e)
20
+ error_type = type(e).__name__
21
+
22
+ snapshot_exists_match = _check_snapshot_exists_error(e, error_str)
23
+ if snapshot_exists_match:
24
+ snapshot_name = snapshot_exists_match
25
+ error_details = {
26
+ 'error_type': 'snapshot_exists',
27
+ 'snapshot_name': snapshot_name
28
+ }
29
+ error_msg = f"Snapshot '{snapshot_name}' already exists in repository"
30
+ logger.error(error_msg)
31
+ logger.error(f"backup_command: {backup_command}")
32
+ return False, error_msg, error_details
33
+
34
+ error_msg = f"Failed to submit backup command: {error_type}: {error_str}"
18
35
  logger.error(error_msg)
19
36
  logger.error(f"backup_command: {backup_command}")
20
- return False, error_msg
37
+ return False, error_msg, None
38
+
39
+
40
+ def _check_snapshot_exists_error(exception: Exception, error_str: str) -> Optional[str]:
41
+ """Check if the error is a 'snapshot already exists' error and extract snapshot name.
42
+
43
+ Args:
44
+ exception: The exception that was raised
45
+ error_str: String representation of the error
46
+
47
+ Returns:
48
+ Snapshot name if this is a snapshot exists error, None otherwise
49
+ """
50
+ snapshot_name_pattern = r"Snapshot with name '([^']+)' already exist"
51
+ error_lower = error_str.lower()
52
+
53
+ is_snapshot_exists_error = (
54
+ "already exist" in error_lower or
55
+ "already exists" in error_lower or
56
+ ("5064" in error_str and "already exist" in error_lower) or
57
+ (hasattr(exception, 'errno') and exception.errno == 5064)
58
+ )
59
+
60
+ if is_snapshot_exists_error:
61
+ match = re.search(snapshot_name_pattern, error_str, re.IGNORECASE)
62
+ if match:
63
+ return match.group(1)
64
+
65
+ return None
21
66
 
22
67
 
23
68
  def poll_backup_status(db, label: str, database: str, max_polls: int = MAX_POLLS, poll_interval: float = 1.0) -> Dict[str, str]:
@@ -117,15 +162,19 @@ def execute_backup(
117
162
  if not database:
118
163
  database = _extract_database_from_command(backup_command)
119
164
 
120
- started_at = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
165
+ cluster_tz = db.timezone
166
+ started_at = timezone.get_current_time_in_cluster_tz(cluster_tz)
121
167
 
122
- success, submit_error = submit_backup_command(db, backup_command)
168
+ success, submit_error, error_details = submit_backup_command(db, backup_command)
123
169
  if not success:
124
- return {
170
+ result = {
125
171
  "success": False,
126
172
  "final_status": None,
127
173
  "error_message": submit_error or "Failed to submit backup command (unknown error)"
128
174
  }
175
+ if error_details:
176
+ result["error_details"] = error_details
177
+ return result
129
178
 
130
179
  try:
131
180
  final_status = poll_backup_status(db, label, database, max_polls, poll_interval)
@@ -133,6 +182,7 @@ def execute_backup(
133
182
  success = final_status["state"] == "FINISHED"
134
183
 
135
184
  try:
185
+ finished_at = timezone.get_current_time_in_cluster_tz(cluster_tz)
136
186
  history.log_backup(
137
187
  db,
138
188
  {
@@ -141,7 +191,7 @@ def execute_backup(
141
191
  "status": final_status["state"],
142
192
  "repository": repository,
143
193
  "started_at": started_at,
144
- "finished_at": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
194
+ "finished_at": finished_at,
145
195
  "error_message": None if success else (final_status["state"] or ""),
146
196
  },
147
197
  )
starrocks_br/planner.py CHANGED
@@ -1,6 +1,7 @@
1
1
  from typing import List, Dict, Optional
2
+ import datetime
2
3
 
3
- from starrocks_br import logger
4
+ from starrocks_br import logger, timezone
4
5
 
5
6
 
6
7
  def find_latest_full_backup(db, database: str) -> Optional[Dict[str, str]]:
@@ -11,7 +12,8 @@ def find_latest_full_backup(db, database: str) -> Optional[Dict[str, str]]:
11
12
  database: Database name to search for
12
13
 
13
14
  Returns:
14
- Dictionary with keys: label, backup_type, finished_at, or None if no full backup found
15
+ Dictionary with keys: label, backup_type, finished_at, or None if no full backup found.
16
+ The finished_at value is returned as a string in the cluster timezone format.
15
17
  """
16
18
  query = f"""
17
19
  SELECT label, backup_type, finished_at
@@ -29,10 +31,18 @@ def find_latest_full_backup(db, database: str) -> Optional[Dict[str, str]]:
29
31
  return None
30
32
 
31
33
  row = rows[0]
34
+ finished_at = row[2]
35
+
36
+ if isinstance(finished_at, datetime.datetime):
37
+ cluster_tz = db.timezone
38
+ finished_at = finished_at.strftime("%Y-%m-%d %H:%M:%S")
39
+ elif not isinstance(finished_at, str):
40
+ finished_at = str(finished_at)
41
+
32
42
  return {
33
43
  "label": row[0],
34
44
  "backup_type": row[1],
35
- "finished_at": row[2]
45
+ "finished_at": finished_at
36
46
  }
37
47
 
38
48
 
@@ -66,6 +76,8 @@ def find_recent_partitions(db, database: str, baseline_backup_label: Optional[st
66
76
  Returns list of dictionaries with keys: database, table, partition_name.
67
77
  Only partitions of tables within the specified database are returned.
68
78
  """
79
+ cluster_tz = db.timezone
80
+
69
81
  if baseline_backup_label:
70
82
  baseline_query = f"""
71
83
  SELECT finished_at
@@ -76,17 +88,21 @@ def find_recent_partitions(db, database: str, baseline_backup_label: Optional[st
76
88
  baseline_rows = db.query(baseline_query)
77
89
  if not baseline_rows:
78
90
  raise ValueError(f"Baseline backup '{baseline_backup_label}' not found or not successful")
79
- baseline_time = baseline_rows[0][0]
91
+ baseline_time_raw = baseline_rows[0][0]
80
92
  else:
81
93
  latest_backup = find_latest_full_backup(db, database)
82
94
  if not latest_backup:
83
95
  raise ValueError(f"No successful full backup found for database '{database}'. Run a full database backup first.")
84
- baseline_time = latest_backup['finished_at']
96
+ baseline_time_raw = latest_backup['finished_at']
85
97
 
86
- if isinstance(baseline_time, str):
87
- threshold_str = baseline_time
98
+ if isinstance(baseline_time_raw, datetime.datetime):
99
+ baseline_time_str = baseline_time_raw.strftime("%Y-%m-%d %H:%M:%S")
100
+ elif isinstance(baseline_time_raw, str):
101
+ baseline_time_str = baseline_time_raw
88
102
  else:
89
- threshold_str = baseline_time.strftime("%Y-%m-%d %H:%M:%S")
103
+ baseline_time_str = str(baseline_time_raw)
104
+
105
+ baseline_dt = timezone.parse_datetime_with_tz(baseline_time_str, cluster_tz)
90
106
 
91
107
  group_tables = find_tables_by_group(db, group_name)
92
108
 
@@ -129,12 +145,16 @@ def find_recent_partitions(db, database: str, baseline_backup_label: Optional[st
129
145
  partition_name = row[1]
130
146
  visible_version_time = row[3]
131
147
 
132
- if isinstance(visible_version_time, str):
133
- version_time_str = visible_version_time
148
+ if isinstance(visible_version_time, datetime.datetime):
149
+ visible_version_time_str = visible_version_time.strftime("%Y-%m-%d %H:%M:%S")
150
+ elif isinstance(visible_version_time, str):
151
+ visible_version_time_str = visible_version_time
134
152
  else:
135
- version_time_str = visible_version_time.strftime("%Y-%m-%d %H:%M:%S")
153
+ visible_version_time_str = str(visible_version_time)
154
+
155
+ visible_version_dt = timezone.parse_datetime_with_tz(visible_version_time_str, cluster_tz)
136
156
 
137
- if version_time_str > threshold_str:
157
+ if visible_version_dt > baseline_dt:
138
158
  recent_partitions.append({
139
159
  'database': db_name,
140
160
  'table': table_name,
starrocks_br/restore.py CHANGED
@@ -3,7 +3,7 @@ import datetime
3
3
  from typing import Dict, List, Optional
4
4
  from . import history, concurrency, logger
5
5
 
6
- MAX_POLLS = 21600 # 6 hours
6
+ MAX_POLLS = 86400 # 1 day
7
7
 
8
8
  def get_snapshot_timestamp(db, repo_name: str, snapshot_name: str) -> str:
9
9
  """Get the backup timestamp for a specific snapshot from the repository.
@@ -281,17 +281,30 @@ def find_restore_pair(db, target_label: str) -> List[str]:
281
281
  raise ValueError(f"Unknown backup type '{target_info['backup_type']}' for label '{target_label}'")
282
282
 
283
283
 
284
- def get_tables_from_backup(db, label: str, group: Optional[str] = None) -> List[str]:
284
+ def get_tables_from_backup(db, label: str, group: Optional[str] = None, table: Optional[str] = None, database: Optional[str] = None) -> List[str]:
285
285
  """Get list of tables to restore from backup manifest.
286
286
 
287
287
  Args:
288
288
  db: Database connection
289
289
  label: Backup label
290
290
  group: Optional inventory group to filter tables
291
+ table: Optional table name to filter (single table, database comes from database parameter)
292
+ database: Database name (required if table is specified)
291
293
 
292
294
  Returns:
293
- List of table names to restore
295
+ List of table names to restore (format: database.table)
296
+
297
+ Raises:
298
+ ValueError: If both group and table are specified
299
+ ValueError: If table is specified but database is not provided
300
+ ValueError: If table is specified but not found in backup
294
301
  """
302
+ if group and table:
303
+ raise ValueError("Cannot specify both --group and --table. Use --table for single table restore or --group for inventory group restore.")
304
+
305
+ if table and not database:
306
+ raise ValueError("database parameter is required when table is specified")
307
+
295
308
  query = f"""
296
309
  SELECT DISTINCT database_name, table_name
297
310
  FROM ops.backup_partitions
@@ -305,6 +318,15 @@ def get_tables_from_backup(db, label: str, group: Optional[str] = None) -> List[
305
318
 
306
319
  tables = [f"{row[0]}.{row[1]}" for row in rows]
307
320
 
321
+ if table:
322
+ target_table = f"{database}.{table}"
323
+ filtered_tables = [t for t in tables if t == target_table]
324
+
325
+ if not filtered_tables:
326
+ raise ValueError(f"Table '{table}' not found in backup '{label}' for database '{database}'")
327
+
328
+ return filtered_tables
329
+
308
330
  if group:
309
331
  group_query = f"""
310
332
  SELECT database_name, table_name
@@ -316,14 +338,26 @@ def get_tables_from_backup(db, label: str, group: Optional[str] = None) -> List[
316
338
  if not group_rows:
317
339
  return []
318
340
 
319
- group_tables = {f"{row[0]}.{row[1]}" for row in group_rows}
341
+ group_tables = set()
342
+ for row in group_rows:
343
+ database_name, table_name = row[0], row[1]
344
+ if table_name == '*':
345
+ show_tables_query = f"SHOW TABLES FROM {database_name}"
346
+ try:
347
+ tables_rows = db.query(show_tables_query)
348
+ for table_row in tables_rows:
349
+ group_tables.add(f"{database_name}.{table_row[0]}")
350
+ except Exception:
351
+ continue
352
+ else:
353
+ group_tables.add(f"{database_name}.{table_name}")
320
354
 
321
355
  tables = [table for table in tables if table in group_tables]
322
356
 
323
357
  return tables
324
358
 
325
359
 
326
- def execute_restore_flow(db, repo_name: str, restore_pair: List[str], tables_to_restore: List[str], rename_suffix: str = "_restored") -> Dict:
360
+ def execute_restore_flow(db, repo_name: str, restore_pair: List[str], tables_to_restore: List[str], rename_suffix: str = "_restored", skip_confirmation: bool = False) -> Dict:
327
361
  """Execute the complete restore flow with safety measures.
328
362
 
329
363
  Args:
@@ -332,6 +366,7 @@ def execute_restore_flow(db, repo_name: str, restore_pair: List[str], tables_to_
332
366
  restore_pair: List of backup labels in restore order
333
367
  tables_to_restore: List of tables to restore (format: database.table)
334
368
  rename_suffix: Suffix for temporary tables
369
+ skip_confirmation: If True, skip interactive confirmation prompt
335
370
 
336
371
  Returns:
337
372
  Dictionary with success status and details
@@ -358,12 +393,15 @@ def execute_restore_flow(db, repo_name: str, restore_pair: List[str], tables_to_
358
393
  logger.info("This will restore data to temporary tables and then perform atomic rename.")
359
394
  logger.warning("WARNING: This operation will replace existing tables!")
360
395
 
361
- confirmation = input("\nDo you want to proceed? [Y/n]: ").strip()
362
- if confirmation.lower() != 'y':
363
- return {
364
- "success": False,
365
- "error_message": "Restore operation cancelled by user"
366
- }
396
+ if not skip_confirmation:
397
+ confirmation = input("\nDo you want to proceed? [Y/n]: ").strip()
398
+ if confirmation.lower() != 'y':
399
+ return {
400
+ "success": False,
401
+ "error_message": "Restore operation cancelled by user"
402
+ }
403
+ else:
404
+ logger.info("Proceeding automatically (--yes flag provided)")
367
405
 
368
406
  try:
369
407
  database_name = tables_to_restore[0].split('.')[0]
@@ -470,6 +508,19 @@ def _build_restore_command_without_rename(backup_label: str, repo_name: str, tab
470
508
  PROPERTIES ("backup_timestamp" = "{backup_timestamp}")"""
471
509
 
472
510
 
511
+ def _generate_timestamped_backup_name(table_name: str) -> str:
512
+ """Generate a timestamped backup table name.
513
+
514
+ Args:
515
+ table_name: Original table name
516
+
517
+ Returns:
518
+ Timestamped backup name in format: {table_name}_backup_YYYYMMDD_HHMMSS
519
+ """
520
+ timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
521
+ return f"{table_name}_backup_{timestamp}"
522
+
523
+
473
524
  def _perform_atomic_rename(db, tables: List[str], rename_suffix: str) -> Dict:
474
525
  """Perform atomic rename of temporary tables to make them live."""
475
526
  try:
@@ -477,8 +528,9 @@ def _perform_atomic_rename(db, tables: List[str], rename_suffix: str) -> Dict:
477
528
  for table in tables:
478
529
  database, table_name = table.split('.', 1)
479
530
  temp_table_name = f"{table_name}{rename_suffix}"
531
+ backup_table_name = _generate_timestamped_backup_name(table_name)
480
532
 
481
- rename_statements.append(f"ALTER TABLE {database}.{table_name} RENAME {table_name}_backup")
533
+ rename_statements.append(f"ALTER TABLE {database}.{table_name} RENAME {backup_table_name}")
482
534
  rename_statements.append(f"ALTER TABLE {database}.{temp_table_name} RENAME {table_name}")
483
535
 
484
536
  for statement in rename_statements:
starrocks_br/schema.py CHANGED
@@ -120,7 +120,7 @@ def get_run_status_schema() -> str:
120
120
  CREATE TABLE IF NOT EXISTS ops.run_status (
121
121
  scope STRING NOT NULL COMMENT "Job scope: backup or restore",
122
122
  label STRING NOT NULL COMMENT "Job label or identifier",
123
- state STRING NOT NULL DEFAULT "ACTIVE" COMMENT "Job state: ACTIVE or COMPLETED",
123
+ state STRING NOT NULL DEFAULT "ACTIVE" COMMENT "Job state: ACTIVE, FINISHED, FAILED, or CANCELLED",
124
124
  started_at DATETIME DEFAULT CURRENT_TIMESTAMP COMMENT "Job start timestamp",
125
125
  finished_at DATETIME COMMENT "Job completion timestamp"
126
126
  )
@@ -0,0 +1,125 @@
1
+ import datetime
2
+ from typing import Union
3
+ from zoneinfo import ZoneInfo
4
+
5
+
6
+ def get_current_time_in_cluster_tz(cluster_tz: str) -> str:
7
+ """Get current time formatted in cluster timezone.
8
+
9
+ Args:
10
+ cluster_tz: Timezone string (e.g., 'Asia/Shanghai', 'UTC', '+08:00')
11
+
12
+ Returns:
13
+ Formatted datetime string in 'YYYY-MM-DD HH:MM:SS' format in the cluster timezone
14
+ """
15
+ tz = _get_timezone(cluster_tz)
16
+ now = datetime.datetime.now(tz)
17
+ return now.strftime("%Y-%m-%d %H:%M:%S")
18
+
19
+
20
+ def parse_datetime_with_tz(dt_str: str, tz: str) -> datetime.datetime:
21
+ """Parse datetime string assuming the given timezone.
22
+
23
+ Args:
24
+ dt_str: Datetime string in 'YYYY-MM-DD HH:MM:SS' format
25
+ tz: Timezone string (e.g., 'Asia/Shanghai', 'UTC', '+08:00')
26
+
27
+ Returns:
28
+ Timezone-aware datetime object
29
+ """
30
+ timezone = _get_timezone(tz)
31
+
32
+ dt = datetime.datetime.strptime(dt_str, "%Y-%m-%d %H:%M:%S")
33
+ dt = dt.replace(tzinfo=timezone)
34
+
35
+ return dt
36
+
37
+
38
+ def normalize_datetime_to_tz(dt: datetime.datetime, target_tz: str) -> datetime.datetime:
39
+ """Convert datetime to target timezone.
40
+
41
+ Args:
42
+ dt: Datetime object (timezone-aware or naive)
43
+ target_tz: Target timezone string (e.g., 'Asia/Shanghai', 'UTC', '+08:00')
44
+
45
+ Returns:
46
+ Timezone-aware datetime object in the target timezone
47
+ """
48
+ timezone = _get_timezone(target_tz)
49
+
50
+ if dt.tzinfo is None:
51
+ dt = dt.replace(tzinfo=datetime.timezone.utc)
52
+
53
+ dt = dt.astimezone(timezone)
54
+
55
+ return dt
56
+
57
+
58
+ def _get_timezone(tz_str: str) -> Union[ZoneInfo, datetime.timezone]:
59
+ """Get timezone object from timezone string.
60
+
61
+ Handles both named timezones (e.g., 'Asia/Shanghai') and offset strings (e.g., '+08:00', '-05:00').
62
+
63
+ Args:
64
+ tz_str: Timezone string
65
+
66
+ Returns:
67
+ ZoneInfo or timezone object
68
+ """
69
+ tz_str = tz_str.strip()
70
+
71
+ if tz_str.upper() == "UTC" or tz_str == "+00:00" or tz_str == "-00:00":
72
+ return ZoneInfo("UTC")
73
+
74
+ if tz_str.startswith(("+", "-")):
75
+ try:
76
+ hours, minutes = _parse_offset(tz_str)
77
+ offset = datetime.timedelta(hours=hours, minutes=minutes)
78
+ return datetime.timezone(offset)
79
+ except ValueError:
80
+ return ZoneInfo("UTC")
81
+
82
+ try:
83
+ return ZoneInfo(tz_str)
84
+ except Exception:
85
+ return ZoneInfo("UTC")
86
+
87
+
88
+ def _parse_offset(offset_str: str) -> tuple[int, int]:
89
+ """Parse timezone offset string to hours and minutes.
90
+
91
+ Args:
92
+ offset_str: Offset string in format '+HH:MM' or '-HH:MM'
93
+
94
+ Returns:
95
+ Tuple of (hours, minutes)
96
+
97
+ Raises:
98
+ ValueError: If offset string is invalid, including:
99
+ - String length < 6 characters
100
+ - Invalid format (missing colon, invalid characters)
101
+ - Hours >= 24 or < 0
102
+ - Minutes >= 60 or < 0
103
+ """
104
+ if len(offset_str) < 6:
105
+ raise ValueError(f"Invalid offset format: {offset_str}")
106
+
107
+ if offset_str[3] != ':':
108
+ raise ValueError(f"Invalid offset format: {offset_str} (missing colon)")
109
+
110
+ sign = 1 if offset_str[0] == '+' else -1
111
+
112
+ try:
113
+ hours = int(offset_str[1:3])
114
+ minutes = int(offset_str[4:6])
115
+ except ValueError as e:
116
+ raise ValueError(f"Invalid offset format: {offset_str} (non-numeric values)") from e
117
+
118
+ if hours < 0 or hours >= 24:
119
+ raise ValueError(f"Invalid offset format: {offset_str} (hours must be 00-23)")
120
+
121
+ if minutes < 0 or minutes >= 60:
122
+ raise ValueError(f"Invalid offset format: {offset_str} (minutes must be 00-59)")
123
+
124
+ return sign * hours, sign * minutes
125
+
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: starrocks-br
3
- Version: 0.1.0
3
+ Version: 0.2.0
4
4
  Summary: StarRocks Backup and Restore automation tool
5
5
  Requires-Python: >=3.9
6
6
  Requires-Dist: click<9,>=8.1.7
@@ -0,0 +1,20 @@
1
+ starrocks_br/__init__.py,sha256=i1m0FIl2IAXaVyNoya0ZNAx3WfhIp9I6VLhTz06qNFY,28
2
+ starrocks_br/cli.py,sha256=3FvtMDYra6_9CwxSWj3i4VcIjHlSDO9LbBwC0-Bh8bc,20550
3
+ starrocks_br/concurrency.py,sha256=wx69u-RW1OnukKn6CQ9EJS4L42N9Gzw7Xz7rgETzmy4,5934
4
+ starrocks_br/config.py,sha256=nL57JR4O1WBG6iOscuCX5aui7lOs6PIiS97MIgVDvOM,2036
5
+ starrocks_br/db.py,sha256=UDmN8uIYTMzxx7_D0XxmYSpH3VTwBKEeBNQ-4hI4SnA,5098
6
+ starrocks_br/executor.py,sha256=I9cefPbHaSiajPvoYBVaRXM06UfP6jzVRdS8q_t5zeE,10681
7
+ starrocks_br/health.py,sha256=DpTy4uqk1UrbV0d9Wtk9Ke9K0iT4ndL-01gqqSywR_c,1050
8
+ starrocks_br/history.py,sha256=j6eqkD1MyTvgoztffnLnr6-6VXd0gdvLxLLKxbC1AG0,3016
9
+ starrocks_br/labels.py,sha256=D67JqIUWtFAnuj9thnC4Y7A0tzLk6d4YpBtDGhen1yc,1689
10
+ starrocks_br/logger.py,sha256=QTfr-nC3TdeU7f1gcRTRDAQSLYpwaevd_iT1B_RbuF8,900
11
+ starrocks_br/planner.py,sha256=_e65v5XRoXKJcLPX73UXKtA721TWjw2Txfet-TItXu8,10326
12
+ starrocks_br/repository.py,sha256=6uTJBYgQFEjJBlfhirriTkad80teTPcBSl_tphUExz4,1269
13
+ starrocks_br/restore.py,sha256=_OzlQjPFkH7ySgFSSbpYZHY3A_U1Jz3-UKTwGl2AiQw,18965
14
+ starrocks_br/schema.py,sha256=s_BAUhNgfQscRzhj-OB4xmp19CYZ0ZElyBfdht0_OSE,6114
15
+ starrocks_br/timezone.py,sha256=ONaudOgIfzPdJpZyIUSh0HF5D7oPQAtungNoUqtGM6M,3738
16
+ starrocks_br-0.2.0.dist-info/METADATA,sha256=RnBcIZXdCArx1rXruABPf735XJjb3ZB1jy-egUrKBy0,419
17
+ starrocks_br-0.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
18
+ starrocks_br-0.2.0.dist-info/entry_points.txt,sha256=AKUt01G2MAlh85s1Q9kNQDOUio14kaTnT3dmg9gjdNg,54
19
+ starrocks_br-0.2.0.dist-info/top_level.txt,sha256=CU1tGVo0kjulhDr761Sndg-oTeRKsisDnWm8UG95aBE,13
20
+ starrocks_br-0.2.0.dist-info/RECORD,,
@@ -1,19 +0,0 @@
1
- starrocks_br/__init__.py,sha256=i1m0FIl2IAXaVyNoya0ZNAx3WfhIp9I6VLhTz06qNFY,28
2
- starrocks_br/cli.py,sha256=VrLLyvyAop687KNJnXIJhxL7EgVPzutQTAOJz4ANZMw,15969
3
- starrocks_br/concurrency.py,sha256=wx69u-RW1OnukKn6CQ9EJS4L42N9Gzw7Xz7rgETzmy4,5934
4
- starrocks_br/config.py,sha256=btmsf43IuxPIraFxJRScuHW-B3Bv7CPO7y43xn2b2cM,1047
5
- starrocks_br/db.py,sha256=whbkM5LVLNNauPDuSFXOXWuTOFWHBVsNai74O7bIUAg,2450
6
- starrocks_br/executor.py,sha256=_lbJQ6KjV7Jg5eYbSLZhvvYj-EM5kBV6lM3Lah9h6-U,8748
7
- starrocks_br/health.py,sha256=DpTy4uqk1UrbV0d9Wtk9Ke9K0iT4ndL-01gqqSywR_c,1050
8
- starrocks_br/history.py,sha256=j6eqkD1MyTvgoztffnLnr6-6VXd0gdvLxLLKxbC1AG0,3016
9
- starrocks_br/labels.py,sha256=D67JqIUWtFAnuj9thnC4Y7A0tzLk6d4YpBtDGhen1yc,1689
10
- starrocks_br/logger.py,sha256=QTfr-nC3TdeU7f1gcRTRDAQSLYpwaevd_iT1B_RbuF8,900
11
- starrocks_br/planner.py,sha256=wnFMbIQhY9dVaoRjmzB3DypPMEXQam94TFenwGhMGGk,9414
12
- starrocks_br/repository.py,sha256=6uTJBYgQFEjJBlfhirriTkad80teTPcBSl_tphUExz4,1269
13
- starrocks_br/restore.py,sha256=IPs8EzIS_7iTUMkKJV32YJvb4b0MTXpLgZmUWhnzPXo,16672
14
- starrocks_br/schema.py,sha256=_cFzD3Bnvb1WDlLzmFXuMSinLjiXKmLbJ9uUhyOCKK4,6095
15
- starrocks_br-0.1.0.dist-info/METADATA,sha256=u1ivetyaj0El_v_5Iy9WmJ4_KsEmhU8GUSy3T7Hb4xQ,419
16
- starrocks_br-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
17
- starrocks_br-0.1.0.dist-info/entry_points.txt,sha256=AKUt01G2MAlh85s1Q9kNQDOUio14kaTnT3dmg9gjdNg,54
18
- starrocks_br-0.1.0.dist-info/top_level.txt,sha256=CU1tGVo0kjulhDr761Sndg-oTeRKsisDnWm8UG95aBE,13
19
- starrocks_br-0.1.0.dist-info/RECORD,,