starrocks-br 0.5.2__py3-none-any.whl → 0.7.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
starrocks_br/cli.py CHANGED
@@ -27,6 +27,7 @@ from . import (
27
27
  labels,
28
28
  logger,
29
29
  planner,
30
+ prune,
30
31
  repository,
31
32
  restore,
32
33
  schema,
@@ -98,13 +99,13 @@ def cli(ctx, verbose):
98
99
  @cli.command("init")
99
100
  @click.option("--config", required=True, help="Path to config YAML file")
100
101
  def init(config):
101
- """Initialize ops database and control tables.
102
+ """Initialize operations database and control tables.
102
103
 
103
- Creates the ops database with required tables:
104
- - ops.table_inventory: Inventory groups mapping to databases/tables
105
- - ops.backup_history: Backup operation history
106
- - ops.restore_history: Restore operation history
107
- - ops.run_status: Job concurrency control
104
+ Creates the operations database (default: 'ops') with required tables:
105
+ - table_inventory: Inventory groups mapping to databases/tables
106
+ - backup_history: Backup operation history
107
+ - restore_history: Restore operation history
108
+ - run_status: Job concurrency control
108
109
 
109
110
  Run this once before using backup/restore commands.
110
111
  """
@@ -112,6 +113,9 @@ def init(config):
112
113
  cfg = config_module.load_config(config)
113
114
  config_module.validate_config(cfg)
114
115
 
116
+ ops_database = config_module.get_ops_database(cfg)
117
+ table_inventory_entries = config_module.get_table_inventory_entries(cfg)
118
+
115
119
  database = db.StarRocksDB(
116
120
  host=cfg["host"],
117
121
  port=cfg["port"],
@@ -121,23 +125,43 @@ def init(config):
121
125
  tls_config=cfg.get("tls"),
122
126
  )
123
127
 
128
+ ops_database = config_module.get_ops_database(cfg)
129
+
124
130
  with database:
125
- logger.info("Initializing ops schema...")
126
- schema.initialize_ops_schema(database)
127
- logger.info("")
128
- logger.info("Next steps:")
129
- logger.info("1. Insert your table inventory records:")
130
- logger.info(" INSERT INTO ops.table_inventory")
131
- logger.info(" (inventory_group, database_name, table_name)")
132
- logger.info(" VALUES ('my_daily_incremental', 'your_db', 'your_fact_table');")
133
- logger.info(" VALUES ('my_full_database_backup', 'your_db', '*');")
134
- logger.info(" VALUES ('my_full_dimension_tables', 'your_db', 'dim_customers');")
135
- logger.info(" VALUES ('my_full_dimension_tables', 'your_db', 'dim_products');")
131
+ logger.info("Validating repository...")
132
+ repository.ensure_repository(database, cfg["repository"])
136
133
  logger.info("")
137
- logger.info("2. Run your first backup:")
138
- logger.info(
139
- " starrocks-br backup incremental --group my_daily_incremental --config config.yaml"
134
+
135
+ logger.info("Initializing ops schema...")
136
+ schema.initialize_ops_schema(
137
+ database, ops_database=ops_database, table_inventory_entries=table_inventory_entries
140
138
  )
139
+ logger.info("")
140
+
141
+ if table_inventory_entries:
142
+ logger.success(
143
+ f"Table inventory bootstrapped from config with {len(table_inventory_entries)} entries"
144
+ )
145
+ logger.info("")
146
+ logger.info("Next steps:")
147
+ logger.info("1. Run your first backup:")
148
+ logger.info(
149
+ f" starrocks-br backup incremental --group <your_group_name> --config {config}"
150
+ )
151
+ else:
152
+ logger.info("Next steps:")
153
+ logger.info("1. Insert your table inventory records:")
154
+ logger.info(f" INSERT INTO {ops_database}.table_inventory")
155
+ logger.info(" (inventory_group, database_name, table_name)")
156
+ logger.info(" VALUES ('my_daily_incremental', 'your_db', 'your_fact_table');")
157
+ logger.info(" VALUES ('my_full_database_backup', 'your_db', '*');")
158
+ logger.info(" VALUES ('my_full_dimension_tables', 'your_db', 'dim_customers');")
159
+ logger.info(" VALUES ('my_full_dimension_tables', 'your_db', 'dim_products');")
160
+ logger.info("")
161
+ logger.info("2. Run your first backup:")
162
+ logger.info(
163
+ " starrocks-br backup incremental --group my_daily_incremental --config config.yaml"
164
+ )
141
165
 
142
166
  except exceptions.ConfigFileNotFoundError as e:
143
167
  error_handler.handle_config_file_not_found_error(e)
@@ -196,13 +220,17 @@ def backup_incremental(config, baseline_backup, group, name):
196
220
  tls_config=cfg.get("tls"),
197
221
  )
198
222
 
223
+ ops_database = config_module.get_ops_database(cfg)
224
+
199
225
  with database:
200
- was_created = schema.ensure_ops_schema(database)
226
+ was_created = schema.ensure_ops_schema(database, ops_database=ops_database)
201
227
  if was_created:
202
228
  logger.warning(
203
229
  "ops schema was auto-created. Please run 'starrocks-br init' after populating config."
204
230
  )
205
- logger.warning("Remember to populate ops.table_inventory with your backup groups!")
231
+ logger.warning(
232
+ "Remember to populate the table_inventory table with your backup groups!"
233
+ )
206
234
  sys.exit(1) # Exit if schema was just created, requires user action
207
235
 
208
236
  healthy, message = health.check_cluster_health(database)
@@ -221,6 +249,7 @@ def backup_incremental(config, baseline_backup, group, name):
221
249
  backup_type="incremental",
222
250
  database_name=cfg["database"],
223
251
  custom_name=name,
252
+ ops_database=ops_database,
224
253
  )
225
254
 
226
255
  logger.success(f"Generated label: {label}")
@@ -239,7 +268,11 @@ def backup_incremental(config, baseline_backup, group, name):
239
268
  )
240
269
 
241
270
  partitions = planner.find_recent_partitions(
242
- database, cfg["database"], baseline_backup_label=baseline_backup, group_name=group
271
+ database,
272
+ cfg["database"],
273
+ baseline_backup_label=baseline_backup,
274
+ group_name=group,
275
+ ops_database=ops_database,
243
276
  )
244
277
 
245
278
  if not partitions:
@@ -252,9 +285,11 @@ def backup_incremental(config, baseline_backup, group, name):
252
285
  partitions, cfg["repository"], label, cfg["database"]
253
286
  )
254
287
 
255
- concurrency.reserve_job_slot(database, scope="backup", label=label)
288
+ concurrency.reserve_job_slot(
289
+ database, scope="backup", label=label, ops_database=ops_database
290
+ )
256
291
 
257
- planner.record_backup_partitions(database, label, partitions)
292
+ planner.record_backup_partitions(database, label, partitions, ops_database=ops_database)
258
293
 
259
294
  logger.success("Job slot reserved")
260
295
  logger.info(f"Starting incremental backup for group '{group}'...")
@@ -265,6 +300,7 @@ def backup_incremental(config, baseline_backup, group, name):
265
300
  backup_type="incremental",
266
301
  scope="backup",
267
302
  database=cfg["database"],
303
+ ops_database=ops_database,
268
304
  )
269
305
 
270
306
  if result["success"]:
@@ -288,7 +324,7 @@ def backup_incremental(config, baseline_backup, group, name):
288
324
  if state == "LOST":
289
325
  logger.critical("Backup tracking lost!")
290
326
  logger.warning("Another backup operation started during ours.")
291
- logger.tip("Enable ops.run_status concurrency checks to prevent this.")
327
+ logger.tip("Enable run_status concurrency checks to prevent this.")
292
328
  logger.error(f"{result['error_message']}")
293
329
  sys.exit(1)
294
330
 
@@ -348,13 +384,17 @@ def backup_full(config, group, name):
348
384
  tls_config=cfg.get("tls"),
349
385
  )
350
386
 
387
+ ops_database = config_module.get_ops_database(cfg)
388
+
351
389
  with database:
352
- was_created = schema.ensure_ops_schema(database)
390
+ was_created = schema.ensure_ops_schema(database, ops_database=ops_database)
353
391
  if was_created:
354
392
  logger.warning(
355
393
  "ops schema was auto-created. Please run 'starrocks-br init' after populating config."
356
394
  )
357
- logger.warning("Remember to populate ops.table_inventory with your backup groups!")
395
+ logger.warning(
396
+ "Remember to populate the table_inventory table with your backup groups!"
397
+ )
358
398
  sys.exit(1) # Exit if schema was just created, requires user action
359
399
 
360
400
  healthy, message = health.check_cluster_health(database)
@@ -369,13 +409,25 @@ def backup_full(config, group, name):
369
409
  logger.success(f"Repository '{cfg['repository']}' verified")
370
410
 
371
411
  label = labels.determine_backup_label(
372
- db=database, backup_type="full", database_name=cfg["database"], custom_name=name
412
+ db=database,
413
+ backup_type="full",
414
+ database_name=cfg["database"],
415
+ custom_name=name,
416
+ ops_database=ops_database,
373
417
  )
374
418
 
375
419
  logger.success(f"Generated label: {label}")
376
420
 
421
+ tables = planner.find_tables_by_group(database, group, ops_database)
422
+ planner.validate_tables_exist(database, cfg["database"], tables, group)
423
+
377
424
  backup_command = planner.build_full_backup_command(
378
- database, group, cfg["repository"], label, cfg["database"]
425
+ database,
426
+ group,
427
+ cfg["repository"],
428
+ label,
429
+ cfg["database"],
430
+ ops_database=ops_database,
379
431
  )
380
432
 
381
433
  if not backup_command:
@@ -389,9 +441,13 @@ def backup_full(config, group, name):
389
441
  database, cfg["database"], tables
390
442
  )
391
443
 
392
- concurrency.reserve_job_slot(database, scope="backup", label=label)
444
+ concurrency.reserve_job_slot(
445
+ database, scope="backup", label=label, ops_database=ops_database
446
+ )
393
447
 
394
- planner.record_backup_partitions(database, label, all_partitions)
448
+ planner.record_backup_partitions(
449
+ database, label, all_partitions, ops_database=ops_database
450
+ )
395
451
 
396
452
  logger.success("Job slot reserved")
397
453
  logger.info(f"Starting full backup for group '{group}'...")
@@ -402,6 +458,7 @@ def backup_full(config, group, name):
402
458
  backup_type="full",
403
459
  scope="backup",
404
460
  database=cfg["database"],
461
+ ops_database=ops_database,
405
462
  )
406
463
 
407
464
  if result["success"]:
@@ -419,10 +476,13 @@ def backup_full(config, group, name):
419
476
  if state == "LOST":
420
477
  logger.critical("Backup tracking lost!")
421
478
  logger.warning("Another backup operation started during ours.")
422
- logger.tip("Enable ops.run_status concurrency checks to prevent this.")
479
+ logger.tip("Enable run_status concurrency checks to prevent this.")
423
480
  logger.error(f"{result['error_message']}")
424
481
  sys.exit(1)
425
482
 
483
+ except exceptions.InvalidTablesInInventoryError as e:
484
+ error_handler.handle_invalid_tables_in_inventory_error(e, config)
485
+ sys.exit(1)
426
486
  except exceptions.ConcurrencyConflictError as e:
427
487
  error_handler.handle_concurrency_conflict_error(e, config)
428
488
  sys.exit(1)
@@ -499,13 +559,17 @@ def restore_command(config, target_label, group, table, rename_suffix, yes):
499
559
  tls_config=cfg.get("tls"),
500
560
  )
501
561
 
562
+ ops_database = config_module.get_ops_database(cfg)
563
+
502
564
  with database:
503
- was_created = schema.ensure_ops_schema(database)
565
+ was_created = schema.ensure_ops_schema(database, ops_database=ops_database)
504
566
  if was_created:
505
567
  logger.warning(
506
568
  "ops schema was auto-created. Please run 'starrocks-br init' after populating config."
507
569
  )
508
- logger.warning("Remember to populate ops.table_inventory with your backup groups!")
570
+ logger.warning(
571
+ "Remember to populate the table_inventory table with your backup groups!"
572
+ )
509
573
  sys.exit(1) # Exit if schema was just created, requires user action
510
574
 
511
575
  healthy, message = health.check_cluster_health(database)
@@ -521,7 +585,9 @@ def restore_command(config, target_label, group, table, rename_suffix, yes):
521
585
 
522
586
  logger.info(f"Finding restore sequence for target backup: {target_label}")
523
587
 
524
- restore_pair = restore.find_restore_pair(database, target_label)
588
+ restore_pair = restore.find_restore_pair(
589
+ database, target_label, ops_database=ops_database
590
+ )
525
591
  logger.success(f"Found restore sequence: {' -> '.join(restore_pair)}")
526
592
 
527
593
  logger.info("Determining tables to restore from backup manifest...")
@@ -532,6 +598,7 @@ def restore_command(config, target_label, group, table, rename_suffix, yes):
532
598
  group=group,
533
599
  table=table,
534
600
  database=cfg["database"] if table else None,
601
+ ops_database=ops_database,
535
602
  )
536
603
 
537
604
  if not tables_to_restore:
@@ -549,6 +616,7 @@ def restore_command(config, target_label, group, table, rename_suffix, yes):
549
616
  tables_to_restore,
550
617
  rename_suffix,
551
618
  skip_confirmation=yes,
619
+ ops_database=ops_database,
552
620
  )
553
621
 
554
622
  if result["success"]:
@@ -604,5 +672,209 @@ def restore_command(config, target_label, group, table, rename_suffix, yes):
604
672
  sys.exit(1)
605
673
 
606
674
 
675
+ @cli.command("prune")
676
+ @click.option("--config", required=True, help="Path to config YAML file")
677
+ @click.option(
678
+ "--group",
679
+ help="Optional inventory group to filter backups. Without this, prunes ALL backups.",
680
+ )
681
+ @click.option(
682
+ "--keep-last",
683
+ type=int,
684
+ help="Keep only the last N successful backups (deletes older ones)",
685
+ )
686
+ @click.option(
687
+ "--older-than",
688
+ help="Delete snapshots older than this timestamp (format: YYYY-MM-DD HH:MM:SS)",
689
+ )
690
+ @click.option("--snapshot", help="Delete a specific snapshot by name")
691
+ @click.option("--snapshots", help="Delete multiple specific snapshots (comma-separated)")
692
+ @click.option(
693
+ "--dry-run",
694
+ is_flag=True,
695
+ help="Show what would be deleted without actually deleting",
696
+ )
697
+ @click.option("--yes", is_flag=True, help="Skip confirmation prompt and proceed automatically")
698
+ def prune_command(config, group, keep_last, older_than, snapshot, snapshots, dry_run, yes):
699
+ """Prune (delete) old backup snapshots from the repository.
700
+
701
+ This command helps manage repository storage by removing old or unwanted snapshots.
702
+ Supports multiple pruning strategies:
703
+ - Keep only the last N backups
704
+ - Delete backups older than a specific date
705
+ - Delete specific snapshots by name
706
+
707
+ Flow: load config → check health → ensure repository → query backups →
708
+ filter snapshots to delete → confirm → execute deletion → cleanup history
709
+ """
710
+ try:
711
+ pruning_options = [keep_last, older_than, snapshot, snapshots]
712
+ specified_options = [opt for opt in pruning_options if opt is not None]
713
+
714
+ if not specified_options:
715
+ logger.error(
716
+ "Must specify one pruning option: --keep-last, --older-than, --snapshot, or --snapshots"
717
+ )
718
+ sys.exit(1)
719
+
720
+ if len(specified_options) > 1:
721
+ logger.error(
722
+ "Pruning options are mutually exclusive. "
723
+ "Please specify only one of: --keep-last, --older-than, --snapshot, or --snapshots"
724
+ )
725
+ sys.exit(1)
726
+
727
+ if keep_last is not None and keep_last <= 0:
728
+ logger.error("--keep-last must be a positive number (greater than 0)")
729
+ sys.exit(1)
730
+
731
+ cfg = config_module.load_config(config)
732
+ config_module.validate_config(cfg)
733
+
734
+ database = db.StarRocksDB(
735
+ host=cfg["host"],
736
+ port=cfg["port"],
737
+ user=cfg["user"],
738
+ password=os.getenv("STARROCKS_PASSWORD"),
739
+ database=cfg["database"],
740
+ tls_config=cfg.get("tls"),
741
+ )
742
+
743
+ ops_database = config_module.get_ops_database(cfg)
744
+
745
+ with database:
746
+ was_created = schema.ensure_ops_schema(database, ops_database=ops_database)
747
+ if was_created:
748
+ logger.warning(
749
+ "ops schema was auto-created. Please run 'starrocks-br init' after populating config."
750
+ )
751
+ sys.exit(1)
752
+
753
+ healthy, message = health.check_cluster_health(database)
754
+ if not healthy:
755
+ logger.error(f"Cluster health check failed: {message}")
756
+ sys.exit(1)
757
+
758
+ logger.success(f"Cluster health: {message}")
759
+
760
+ repository.ensure_repository(database, cfg["repository"])
761
+ logger.success(f"Repository '{cfg['repository']}' verified")
762
+
763
+ if keep_last:
764
+ strategy = "keep_last"
765
+ strategy_kwargs = {"count": keep_last}
766
+ logger.info(f"Pruning strategy: Keep last {keep_last} backup(s)")
767
+ elif older_than:
768
+ strategy = "older_than"
769
+ strategy_kwargs = {"timestamp": older_than}
770
+ logger.info(f"Pruning strategy: Delete backups older than {older_than}")
771
+ elif snapshot:
772
+ strategy = "specific"
773
+ strategy_kwargs = {"snapshot": snapshot}
774
+ logger.info(f"Pruning strategy: Delete specific snapshot '{snapshot}'")
775
+ elif snapshots:
776
+ strategy = "multiple"
777
+ snapshot_list = [s.strip() for s in snapshots.split(",")]
778
+ strategy_kwargs = {"snapshots": snapshot_list}
779
+ logger.info(f"Pruning strategy: Delete {len(snapshot_list)} specific snapshot(s)")
780
+
781
+ if group:
782
+ logger.info(f"Filtering by inventory group: {group}")
783
+
784
+ all_backups = prune.get_successful_backups(
785
+ database, cfg["repository"], group=group, ops_database=ops_database
786
+ )
787
+
788
+ if not all_backups:
789
+ msg = f"No successful backups found in repository '{cfg['repository']}'"
790
+ if group:
791
+ msg += f" for inventory group '{group}'"
792
+ logger.info(msg)
793
+ sys.exit(0)
794
+
795
+ logger.info(f"Found {len(all_backups)} total backup(s) in repository")
796
+
797
+ if strategy in ["specific", "multiple"]:
798
+ snapshots_to_verify = (
799
+ [snapshot] if strategy == "specific" else strategy_kwargs["snapshots"]
800
+ )
801
+ for snap in snapshots_to_verify:
802
+ prune.verify_snapshot_exists(database, cfg["repository"], snap)
803
+
804
+ snapshots_to_delete = prune.filter_snapshots_to_delete(
805
+ all_backups, strategy, **strategy_kwargs
806
+ )
807
+
808
+ if not snapshots_to_delete:
809
+ logger.success("No snapshots to delete based on the specified criteria")
810
+ sys.exit(0)
811
+
812
+ logger.info("")
813
+ logger.info(f"Snapshots to delete: {len(snapshots_to_delete)}")
814
+ for snap in snapshots_to_delete:
815
+ logger.info(f" - {snap['label']} (finished: {snap['finished_at']})")
816
+
817
+ if keep_last:
818
+ kept_count = len(all_backups) - len(snapshots_to_delete)
819
+ logger.info(f"Snapshots to keep: {kept_count} (most recent)")
820
+
821
+ if dry_run:
822
+ logger.info("")
823
+ logger.warning("DRY RUN MODE - No snapshots will be deleted")
824
+ logger.info(f"Would delete {len(snapshots_to_delete)} snapshot(s)")
825
+ sys.exit(0)
826
+
827
+ if not yes:
828
+ logger.info("")
829
+ logger.warning(
830
+ f"This will permanently delete {len(snapshots_to_delete)} snapshot(s) from the repository"
831
+ )
832
+ confirm = click.confirm("Do you want to proceed?", default=False)
833
+ if not confirm:
834
+ logger.info("Prune operation cancelled by user")
835
+ sys.exit(1)
836
+
837
+ logger.info("")
838
+ logger.info("Starting snapshot deletion...")
839
+ deleted_count = 0
840
+ failed_count = 0
841
+
842
+ for snap in snapshots_to_delete:
843
+ try:
844
+ prune.execute_drop_snapshot(database, cfg["repository"], snap["label"])
845
+ prune.cleanup_backup_history(database, snap["label"], ops_database=ops_database)
846
+ deleted_count += 1
847
+ except Exception as e:
848
+ logger.error(f"Failed to delete snapshot '{snap['label']}': {e}")
849
+ failed_count += 1
850
+
851
+ logger.info("")
852
+ logger.success(f"Deleted {deleted_count} snapshot(s)")
853
+
854
+ if failed_count > 0:
855
+ logger.warning(f"Failed to delete {failed_count} snapshot(s)")
856
+
857
+ if keep_last:
858
+ logger.success(f"Kept {len(all_backups) - len(snapshots_to_delete)} most recent backup(s)")
859
+
860
+ sys.exit(0 if failed_count == 0 else 1)
861
+
862
+ except exceptions.ConfigFileNotFoundError as e:
863
+ error_handler.handle_config_file_not_found_error(e)
864
+ sys.exit(1)
865
+ except exceptions.ConfigValidationError as e:
866
+ error_handler.handle_config_validation_error(e, config)
867
+ sys.exit(1)
868
+ except FileNotFoundError as e:
869
+ error_handler.handle_config_file_not_found_error(exceptions.ConfigFileNotFoundError(str(e)))
870
+ sys.exit(1)
871
+ except ValueError as e:
872
+ logger.error(f"Validation error: {e}")
873
+ sys.exit(1)
874
+ except Exception as e:
875
+ logger.error(f"Unexpected error: {e}")
876
+ sys.exit(1)
877
+
878
+
607
879
  if __name__ == "__main__":
608
880
  cli()
@@ -17,45 +17,51 @@ from typing import Literal
17
17
  from . import exceptions, logger, utils
18
18
 
19
19
 
20
- def reserve_job_slot(db, scope: str, label: str) -> None:
21
- """Reserve a job slot in ops.run_status to prevent overlapping jobs.
20
+ def reserve_job_slot(db, scope: str, label: str, ops_database: str = "ops") -> None:
21
+ """Reserve a job slot in the run_status table to prevent overlapping jobs.
22
22
 
23
23
  We consider any row with state='ACTIVE' for the same scope as a conflict.
24
24
  However, we implement self-healing logic to automatically clean up stale locks.
25
25
  """
26
- active_jobs = _get_active_jobs_for_scope(db, scope)
26
+ active_jobs = _get_active_jobs_for_scope(db, scope, ops_database)
27
27
 
28
28
  if not active_jobs:
29
- _insert_new_job(db, scope, label)
29
+ _insert_new_job(db, scope, label, ops_database)
30
30
  return
31
31
 
32
- _handle_active_job_conflicts(db, scope, active_jobs)
32
+ _handle_active_job_conflicts(db, scope, active_jobs, ops_database)
33
33
 
34
- _insert_new_job(db, scope, label)
34
+ _insert_new_job(db, scope, label, ops_database)
35
35
 
36
36
 
37
- def _get_active_jobs_for_scope(db, scope: str) -> list[tuple[str, str, str]]:
37
+ def _get_active_jobs_for_scope(
38
+ db, scope: str, ops_database: str = "ops"
39
+ ) -> list[tuple[str, str, str]]:
38
40
  """Get all active jobs for the given scope."""
39
- rows = db.query("SELECT scope, label, state FROM ops.run_status WHERE state = 'ACTIVE'")
41
+ rows = db.query(
42
+ f"SELECT scope, label, state FROM {ops_database}.run_status WHERE state = 'ACTIVE'"
43
+ )
40
44
  return [row for row in rows if row[0] == scope]
41
45
 
42
46
 
43
- def _handle_active_job_conflicts(db, scope: str, active_jobs: list[tuple[str, str, str]]) -> None:
47
+ def _handle_active_job_conflicts(
48
+ db, scope: str, active_jobs: list[tuple[str, str, str]], ops_database: str = "ops"
49
+ ) -> None:
44
50
  """Handle conflicts with active jobs, cleaning up stale ones where possible."""
45
51
  for active_scope, active_label, _ in active_jobs:
46
- if _can_heal_stale_job(active_scope, active_label, db):
47
- _cleanup_stale_job(db, active_scope, active_label)
52
+ if _can_heal_stale_job(active_scope, active_label, db, ops_database):
53
+ _cleanup_stale_job(db, active_scope, active_label, ops_database)
48
54
  logger.success(f"Cleaned up stale backup job: {active_label}")
49
55
  else:
50
56
  _raise_concurrency_conflict(scope, active_jobs)
51
57
 
52
58
 
53
- def _can_heal_stale_job(scope: str, label: str, db) -> bool:
59
+ def _can_heal_stale_job(scope: str, label: str, db, ops_database: str = "ops") -> bool:
54
60
  """Check if a stale job can be healed (only for backup jobs)."""
55
61
  if scope != "backup":
56
62
  return False
57
63
 
58
- return _is_backup_job_stale(db, label)
64
+ return _is_backup_job_stale(db, label, ops_database)
59
65
 
60
66
 
61
67
  def _raise_concurrency_conflict(scope: str, active_jobs: list[tuple[str, str, str]]) -> None:
@@ -63,22 +69,22 @@ def _raise_concurrency_conflict(scope: str, active_jobs: list[tuple[str, str, st
63
69
  raise exceptions.ConcurrencyConflictError(scope, active_jobs)
64
70
 
65
71
 
66
- def _insert_new_job(db, scope: str, label: str) -> None:
72
+ def _insert_new_job(db, scope: str, label: str, ops_database: str = "ops") -> None:
67
73
  """Insert a new active job record."""
68
74
  sql = f"""
69
- INSERT INTO ops.run_status (scope, label, state, started_at)
75
+ INSERT INTO {ops_database}.run_status (scope, label, state, started_at)
70
76
  VALUES ({utils.quote_value(scope)}, {utils.quote_value(label)}, 'ACTIVE', NOW())
71
77
  """
72
78
  db.execute(sql)
73
79
 
74
80
 
75
- def _is_backup_job_stale(db, label: str) -> bool:
81
+ def _is_backup_job_stale(db, label: str, ops_database: str = "ops") -> bool:
76
82
  """Check if a backup job is stale by querying StarRocks SHOW BACKUP.
77
83
 
78
84
  Returns True if the job is stale (not actually running), False if it's still active.
79
85
  """
80
86
  try:
81
- user_databases = _get_user_databases(db)
87
+ user_databases = _get_user_databases(db, ops_database)
82
88
 
83
89
  for database_name in user_databases:
84
90
  job_status = _check_backup_job_in_database(db, database_name, label)
@@ -98,9 +104,9 @@ def _is_backup_job_stale(db, label: str) -> bool:
98
104
  return False
99
105
 
100
106
 
101
- def _get_user_databases(db) -> list[str]:
107
+ def _get_user_databases(db, ops_database: str = "ops") -> list[str]:
102
108
  """Get list of user databases (excluding system databases)."""
103
- system_databases = {"information_schema", "mysql", "sys", "ops"}
109
+ system_databases = {"information_schema", "mysql", "sys", ops_database}
104
110
 
105
111
  databases = db.query("SHOW DATABASES")
106
112
  return [
@@ -159,10 +165,10 @@ def _extract_backup_info(result) -> tuple[str, str]:
159
165
  return snapshot_name, state
160
166
 
161
167
 
162
- def _cleanup_stale_job(db, scope: str, label: str) -> None:
168
+ def _cleanup_stale_job(db, scope: str, label: str, ops_database: str = "ops") -> None:
163
169
  """Clean up a stale job by updating its state to CANCELLED."""
164
170
  sql = f"""
165
- UPDATE ops.run_status
171
+ UPDATE {ops_database}.run_status
166
172
  SET state='CANCELLED', finished_at=NOW()
167
173
  WHERE scope={utils.quote_value(scope)} AND label={utils.quote_value(label)} AND state='ACTIVE'
168
174
  """
@@ -170,14 +176,18 @@ def _cleanup_stale_job(db, scope: str, label: str) -> None:
170
176
 
171
177
 
172
178
  def complete_job_slot(
173
- db, scope: str, label: str, final_state: Literal["FINISHED", "FAILED", "CANCELLED"]
179
+ db,
180
+ scope: str,
181
+ label: str,
182
+ final_state: Literal["FINISHED", "FAILED", "CANCELLED"],
183
+ ops_database: str = "ops",
174
184
  ) -> None:
175
185
  """Complete job slot and persist final state.
176
186
 
177
187
  Simple approach: update the same row by scope/label.
178
188
  """
179
189
  sql = f"""
180
- UPDATE ops.run_status
190
+ UPDATE {ops_database}.run_status
181
191
  SET state={utils.quote_value(final_state)}, finished_at=NOW()
182
192
  WHERE scope={utils.quote_value(scope)} AND label={utils.quote_value(label)}
183
193
  """