starrocks-br 0.5.2__py3-none-any.whl → 0.7.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- starrocks_br/cli.py +308 -36
- starrocks_br/concurrency.py +33 -23
- starrocks_br/config.py +75 -0
- starrocks_br/error_handler.py +59 -12
- starrocks_br/exceptions.py +14 -0
- starrocks_br/executor.py +9 -2
- starrocks_br/history.py +9 -9
- starrocks_br/labels.py +5 -3
- starrocks_br/planner.py +56 -13
- starrocks_br/prune.py +208 -0
- starrocks_br/repository.py +1 -1
- starrocks_br/restore.py +197 -40
- starrocks_br/schema.py +89 -43
- {starrocks_br-0.5.2.dist-info → starrocks_br-0.7.0a1.dist-info}/METADATA +25 -2
- starrocks_br-0.7.0a1.dist-info/RECORD +25 -0
- {starrocks_br-0.5.2.dist-info → starrocks_br-0.7.0a1.dist-info}/WHEEL +1 -1
- starrocks_br-0.5.2.dist-info/RECORD +0 -24
- {starrocks_br-0.5.2.dist-info → starrocks_br-0.7.0a1.dist-info}/entry_points.txt +0 -0
- {starrocks_br-0.5.2.dist-info → starrocks_br-0.7.0a1.dist-info}/licenses/LICENSE +0 -0
- {starrocks_br-0.5.2.dist-info → starrocks_br-0.7.0a1.dist-info}/top_level.txt +0 -0
starrocks_br/cli.py
CHANGED
|
@@ -27,6 +27,7 @@ from . import (
|
|
|
27
27
|
labels,
|
|
28
28
|
logger,
|
|
29
29
|
planner,
|
|
30
|
+
prune,
|
|
30
31
|
repository,
|
|
31
32
|
restore,
|
|
32
33
|
schema,
|
|
@@ -98,13 +99,13 @@ def cli(ctx, verbose):
|
|
|
98
99
|
@cli.command("init")
|
|
99
100
|
@click.option("--config", required=True, help="Path to config YAML file")
|
|
100
101
|
def init(config):
|
|
101
|
-
"""Initialize
|
|
102
|
+
"""Initialize operations database and control tables.
|
|
102
103
|
|
|
103
|
-
Creates the
|
|
104
|
-
-
|
|
105
|
-
-
|
|
106
|
-
-
|
|
107
|
-
-
|
|
104
|
+
Creates the operations database (default: 'ops') with required tables:
|
|
105
|
+
- table_inventory: Inventory groups mapping to databases/tables
|
|
106
|
+
- backup_history: Backup operation history
|
|
107
|
+
- restore_history: Restore operation history
|
|
108
|
+
- run_status: Job concurrency control
|
|
108
109
|
|
|
109
110
|
Run this once before using backup/restore commands.
|
|
110
111
|
"""
|
|
@@ -112,6 +113,9 @@ def init(config):
|
|
|
112
113
|
cfg = config_module.load_config(config)
|
|
113
114
|
config_module.validate_config(cfg)
|
|
114
115
|
|
|
116
|
+
ops_database = config_module.get_ops_database(cfg)
|
|
117
|
+
table_inventory_entries = config_module.get_table_inventory_entries(cfg)
|
|
118
|
+
|
|
115
119
|
database = db.StarRocksDB(
|
|
116
120
|
host=cfg["host"],
|
|
117
121
|
port=cfg["port"],
|
|
@@ -121,23 +125,43 @@ def init(config):
|
|
|
121
125
|
tls_config=cfg.get("tls"),
|
|
122
126
|
)
|
|
123
127
|
|
|
128
|
+
ops_database = config_module.get_ops_database(cfg)
|
|
129
|
+
|
|
124
130
|
with database:
|
|
125
|
-
logger.info("
|
|
126
|
-
|
|
127
|
-
logger.info("")
|
|
128
|
-
logger.info("Next steps:")
|
|
129
|
-
logger.info("1. Insert your table inventory records:")
|
|
130
|
-
logger.info(" INSERT INTO ops.table_inventory")
|
|
131
|
-
logger.info(" (inventory_group, database_name, table_name)")
|
|
132
|
-
logger.info(" VALUES ('my_daily_incremental', 'your_db', 'your_fact_table');")
|
|
133
|
-
logger.info(" VALUES ('my_full_database_backup', 'your_db', '*');")
|
|
134
|
-
logger.info(" VALUES ('my_full_dimension_tables', 'your_db', 'dim_customers');")
|
|
135
|
-
logger.info(" VALUES ('my_full_dimension_tables', 'your_db', 'dim_products');")
|
|
131
|
+
logger.info("Validating repository...")
|
|
132
|
+
repository.ensure_repository(database, cfg["repository"])
|
|
136
133
|
logger.info("")
|
|
137
|
-
|
|
138
|
-
logger.info(
|
|
139
|
-
|
|
134
|
+
|
|
135
|
+
logger.info("Initializing ops schema...")
|
|
136
|
+
schema.initialize_ops_schema(
|
|
137
|
+
database, ops_database=ops_database, table_inventory_entries=table_inventory_entries
|
|
140
138
|
)
|
|
139
|
+
logger.info("")
|
|
140
|
+
|
|
141
|
+
if table_inventory_entries:
|
|
142
|
+
logger.success(
|
|
143
|
+
f"Table inventory bootstrapped from config with {len(table_inventory_entries)} entries"
|
|
144
|
+
)
|
|
145
|
+
logger.info("")
|
|
146
|
+
logger.info("Next steps:")
|
|
147
|
+
logger.info("1. Run your first backup:")
|
|
148
|
+
logger.info(
|
|
149
|
+
f" starrocks-br backup incremental --group <your_group_name> --config {config}"
|
|
150
|
+
)
|
|
151
|
+
else:
|
|
152
|
+
logger.info("Next steps:")
|
|
153
|
+
logger.info("1. Insert your table inventory records:")
|
|
154
|
+
logger.info(f" INSERT INTO {ops_database}.table_inventory")
|
|
155
|
+
logger.info(" (inventory_group, database_name, table_name)")
|
|
156
|
+
logger.info(" VALUES ('my_daily_incremental', 'your_db', 'your_fact_table');")
|
|
157
|
+
logger.info(" VALUES ('my_full_database_backup', 'your_db', '*');")
|
|
158
|
+
logger.info(" VALUES ('my_full_dimension_tables', 'your_db', 'dim_customers');")
|
|
159
|
+
logger.info(" VALUES ('my_full_dimension_tables', 'your_db', 'dim_products');")
|
|
160
|
+
logger.info("")
|
|
161
|
+
logger.info("2. Run your first backup:")
|
|
162
|
+
logger.info(
|
|
163
|
+
" starrocks-br backup incremental --group my_daily_incremental --config config.yaml"
|
|
164
|
+
)
|
|
141
165
|
|
|
142
166
|
except exceptions.ConfigFileNotFoundError as e:
|
|
143
167
|
error_handler.handle_config_file_not_found_error(e)
|
|
@@ -196,13 +220,17 @@ def backup_incremental(config, baseline_backup, group, name):
|
|
|
196
220
|
tls_config=cfg.get("tls"),
|
|
197
221
|
)
|
|
198
222
|
|
|
223
|
+
ops_database = config_module.get_ops_database(cfg)
|
|
224
|
+
|
|
199
225
|
with database:
|
|
200
|
-
was_created = schema.ensure_ops_schema(database)
|
|
226
|
+
was_created = schema.ensure_ops_schema(database, ops_database=ops_database)
|
|
201
227
|
if was_created:
|
|
202
228
|
logger.warning(
|
|
203
229
|
"ops schema was auto-created. Please run 'starrocks-br init' after populating config."
|
|
204
230
|
)
|
|
205
|
-
logger.warning(
|
|
231
|
+
logger.warning(
|
|
232
|
+
"Remember to populate the table_inventory table with your backup groups!"
|
|
233
|
+
)
|
|
206
234
|
sys.exit(1) # Exit if schema was just created, requires user action
|
|
207
235
|
|
|
208
236
|
healthy, message = health.check_cluster_health(database)
|
|
@@ -221,6 +249,7 @@ def backup_incremental(config, baseline_backup, group, name):
|
|
|
221
249
|
backup_type="incremental",
|
|
222
250
|
database_name=cfg["database"],
|
|
223
251
|
custom_name=name,
|
|
252
|
+
ops_database=ops_database,
|
|
224
253
|
)
|
|
225
254
|
|
|
226
255
|
logger.success(f"Generated label: {label}")
|
|
@@ -239,7 +268,11 @@ def backup_incremental(config, baseline_backup, group, name):
|
|
|
239
268
|
)
|
|
240
269
|
|
|
241
270
|
partitions = planner.find_recent_partitions(
|
|
242
|
-
database,
|
|
271
|
+
database,
|
|
272
|
+
cfg["database"],
|
|
273
|
+
baseline_backup_label=baseline_backup,
|
|
274
|
+
group_name=group,
|
|
275
|
+
ops_database=ops_database,
|
|
243
276
|
)
|
|
244
277
|
|
|
245
278
|
if not partitions:
|
|
@@ -252,9 +285,11 @@ def backup_incremental(config, baseline_backup, group, name):
|
|
|
252
285
|
partitions, cfg["repository"], label, cfg["database"]
|
|
253
286
|
)
|
|
254
287
|
|
|
255
|
-
concurrency.reserve_job_slot(
|
|
288
|
+
concurrency.reserve_job_slot(
|
|
289
|
+
database, scope="backup", label=label, ops_database=ops_database
|
|
290
|
+
)
|
|
256
291
|
|
|
257
|
-
planner.record_backup_partitions(database, label, partitions)
|
|
292
|
+
planner.record_backup_partitions(database, label, partitions, ops_database=ops_database)
|
|
258
293
|
|
|
259
294
|
logger.success("Job slot reserved")
|
|
260
295
|
logger.info(f"Starting incremental backup for group '{group}'...")
|
|
@@ -265,6 +300,7 @@ def backup_incremental(config, baseline_backup, group, name):
|
|
|
265
300
|
backup_type="incremental",
|
|
266
301
|
scope="backup",
|
|
267
302
|
database=cfg["database"],
|
|
303
|
+
ops_database=ops_database,
|
|
268
304
|
)
|
|
269
305
|
|
|
270
306
|
if result["success"]:
|
|
@@ -288,7 +324,7 @@ def backup_incremental(config, baseline_backup, group, name):
|
|
|
288
324
|
if state == "LOST":
|
|
289
325
|
logger.critical("Backup tracking lost!")
|
|
290
326
|
logger.warning("Another backup operation started during ours.")
|
|
291
|
-
logger.tip("Enable
|
|
327
|
+
logger.tip("Enable run_status concurrency checks to prevent this.")
|
|
292
328
|
logger.error(f"{result['error_message']}")
|
|
293
329
|
sys.exit(1)
|
|
294
330
|
|
|
@@ -348,13 +384,17 @@ def backup_full(config, group, name):
|
|
|
348
384
|
tls_config=cfg.get("tls"),
|
|
349
385
|
)
|
|
350
386
|
|
|
387
|
+
ops_database = config_module.get_ops_database(cfg)
|
|
388
|
+
|
|
351
389
|
with database:
|
|
352
|
-
was_created = schema.ensure_ops_schema(database)
|
|
390
|
+
was_created = schema.ensure_ops_schema(database, ops_database=ops_database)
|
|
353
391
|
if was_created:
|
|
354
392
|
logger.warning(
|
|
355
393
|
"ops schema was auto-created. Please run 'starrocks-br init' after populating config."
|
|
356
394
|
)
|
|
357
|
-
logger.warning(
|
|
395
|
+
logger.warning(
|
|
396
|
+
"Remember to populate the table_inventory table with your backup groups!"
|
|
397
|
+
)
|
|
358
398
|
sys.exit(1) # Exit if schema was just created, requires user action
|
|
359
399
|
|
|
360
400
|
healthy, message = health.check_cluster_health(database)
|
|
@@ -369,13 +409,25 @@ def backup_full(config, group, name):
|
|
|
369
409
|
logger.success(f"Repository '{cfg['repository']}' verified")
|
|
370
410
|
|
|
371
411
|
label = labels.determine_backup_label(
|
|
372
|
-
db=database,
|
|
412
|
+
db=database,
|
|
413
|
+
backup_type="full",
|
|
414
|
+
database_name=cfg["database"],
|
|
415
|
+
custom_name=name,
|
|
416
|
+
ops_database=ops_database,
|
|
373
417
|
)
|
|
374
418
|
|
|
375
419
|
logger.success(f"Generated label: {label}")
|
|
376
420
|
|
|
421
|
+
tables = planner.find_tables_by_group(database, group, ops_database)
|
|
422
|
+
planner.validate_tables_exist(database, cfg["database"], tables, group)
|
|
423
|
+
|
|
377
424
|
backup_command = planner.build_full_backup_command(
|
|
378
|
-
database,
|
|
425
|
+
database,
|
|
426
|
+
group,
|
|
427
|
+
cfg["repository"],
|
|
428
|
+
label,
|
|
429
|
+
cfg["database"],
|
|
430
|
+
ops_database=ops_database,
|
|
379
431
|
)
|
|
380
432
|
|
|
381
433
|
if not backup_command:
|
|
@@ -389,9 +441,13 @@ def backup_full(config, group, name):
|
|
|
389
441
|
database, cfg["database"], tables
|
|
390
442
|
)
|
|
391
443
|
|
|
392
|
-
concurrency.reserve_job_slot(
|
|
444
|
+
concurrency.reserve_job_slot(
|
|
445
|
+
database, scope="backup", label=label, ops_database=ops_database
|
|
446
|
+
)
|
|
393
447
|
|
|
394
|
-
planner.record_backup_partitions(
|
|
448
|
+
planner.record_backup_partitions(
|
|
449
|
+
database, label, all_partitions, ops_database=ops_database
|
|
450
|
+
)
|
|
395
451
|
|
|
396
452
|
logger.success("Job slot reserved")
|
|
397
453
|
logger.info(f"Starting full backup for group '{group}'...")
|
|
@@ -402,6 +458,7 @@ def backup_full(config, group, name):
|
|
|
402
458
|
backup_type="full",
|
|
403
459
|
scope="backup",
|
|
404
460
|
database=cfg["database"],
|
|
461
|
+
ops_database=ops_database,
|
|
405
462
|
)
|
|
406
463
|
|
|
407
464
|
if result["success"]:
|
|
@@ -419,10 +476,13 @@ def backup_full(config, group, name):
|
|
|
419
476
|
if state == "LOST":
|
|
420
477
|
logger.critical("Backup tracking lost!")
|
|
421
478
|
logger.warning("Another backup operation started during ours.")
|
|
422
|
-
logger.tip("Enable
|
|
479
|
+
logger.tip("Enable run_status concurrency checks to prevent this.")
|
|
423
480
|
logger.error(f"{result['error_message']}")
|
|
424
481
|
sys.exit(1)
|
|
425
482
|
|
|
483
|
+
except exceptions.InvalidTablesInInventoryError as e:
|
|
484
|
+
error_handler.handle_invalid_tables_in_inventory_error(e, config)
|
|
485
|
+
sys.exit(1)
|
|
426
486
|
except exceptions.ConcurrencyConflictError as e:
|
|
427
487
|
error_handler.handle_concurrency_conflict_error(e, config)
|
|
428
488
|
sys.exit(1)
|
|
@@ -499,13 +559,17 @@ def restore_command(config, target_label, group, table, rename_suffix, yes):
|
|
|
499
559
|
tls_config=cfg.get("tls"),
|
|
500
560
|
)
|
|
501
561
|
|
|
562
|
+
ops_database = config_module.get_ops_database(cfg)
|
|
563
|
+
|
|
502
564
|
with database:
|
|
503
|
-
was_created = schema.ensure_ops_schema(database)
|
|
565
|
+
was_created = schema.ensure_ops_schema(database, ops_database=ops_database)
|
|
504
566
|
if was_created:
|
|
505
567
|
logger.warning(
|
|
506
568
|
"ops schema was auto-created. Please run 'starrocks-br init' after populating config."
|
|
507
569
|
)
|
|
508
|
-
logger.warning(
|
|
570
|
+
logger.warning(
|
|
571
|
+
"Remember to populate the table_inventory table with your backup groups!"
|
|
572
|
+
)
|
|
509
573
|
sys.exit(1) # Exit if schema was just created, requires user action
|
|
510
574
|
|
|
511
575
|
healthy, message = health.check_cluster_health(database)
|
|
@@ -521,7 +585,9 @@ def restore_command(config, target_label, group, table, rename_suffix, yes):
|
|
|
521
585
|
|
|
522
586
|
logger.info(f"Finding restore sequence for target backup: {target_label}")
|
|
523
587
|
|
|
524
|
-
restore_pair = restore.find_restore_pair(
|
|
588
|
+
restore_pair = restore.find_restore_pair(
|
|
589
|
+
database, target_label, ops_database=ops_database
|
|
590
|
+
)
|
|
525
591
|
logger.success(f"Found restore sequence: {' -> '.join(restore_pair)}")
|
|
526
592
|
|
|
527
593
|
logger.info("Determining tables to restore from backup manifest...")
|
|
@@ -532,6 +598,7 @@ def restore_command(config, target_label, group, table, rename_suffix, yes):
|
|
|
532
598
|
group=group,
|
|
533
599
|
table=table,
|
|
534
600
|
database=cfg["database"] if table else None,
|
|
601
|
+
ops_database=ops_database,
|
|
535
602
|
)
|
|
536
603
|
|
|
537
604
|
if not tables_to_restore:
|
|
@@ -549,6 +616,7 @@ def restore_command(config, target_label, group, table, rename_suffix, yes):
|
|
|
549
616
|
tables_to_restore,
|
|
550
617
|
rename_suffix,
|
|
551
618
|
skip_confirmation=yes,
|
|
619
|
+
ops_database=ops_database,
|
|
552
620
|
)
|
|
553
621
|
|
|
554
622
|
if result["success"]:
|
|
@@ -604,5 +672,209 @@ def restore_command(config, target_label, group, table, rename_suffix, yes):
|
|
|
604
672
|
sys.exit(1)
|
|
605
673
|
|
|
606
674
|
|
|
675
|
+
@cli.command("prune")
|
|
676
|
+
@click.option("--config", required=True, help="Path to config YAML file")
|
|
677
|
+
@click.option(
|
|
678
|
+
"--group",
|
|
679
|
+
help="Optional inventory group to filter backups. Without this, prunes ALL backups.",
|
|
680
|
+
)
|
|
681
|
+
@click.option(
|
|
682
|
+
"--keep-last",
|
|
683
|
+
type=int,
|
|
684
|
+
help="Keep only the last N successful backups (deletes older ones)",
|
|
685
|
+
)
|
|
686
|
+
@click.option(
|
|
687
|
+
"--older-than",
|
|
688
|
+
help="Delete snapshots older than this timestamp (format: YYYY-MM-DD HH:MM:SS)",
|
|
689
|
+
)
|
|
690
|
+
@click.option("--snapshot", help="Delete a specific snapshot by name")
|
|
691
|
+
@click.option("--snapshots", help="Delete multiple specific snapshots (comma-separated)")
|
|
692
|
+
@click.option(
|
|
693
|
+
"--dry-run",
|
|
694
|
+
is_flag=True,
|
|
695
|
+
help="Show what would be deleted without actually deleting",
|
|
696
|
+
)
|
|
697
|
+
@click.option("--yes", is_flag=True, help="Skip confirmation prompt and proceed automatically")
|
|
698
|
+
def prune_command(config, group, keep_last, older_than, snapshot, snapshots, dry_run, yes):
|
|
699
|
+
"""Prune (delete) old backup snapshots from the repository.
|
|
700
|
+
|
|
701
|
+
This command helps manage repository storage by removing old or unwanted snapshots.
|
|
702
|
+
Supports multiple pruning strategies:
|
|
703
|
+
- Keep only the last N backups
|
|
704
|
+
- Delete backups older than a specific date
|
|
705
|
+
- Delete specific snapshots by name
|
|
706
|
+
|
|
707
|
+
Flow: load config → check health → ensure repository → query backups →
|
|
708
|
+
filter snapshots to delete → confirm → execute deletion → cleanup history
|
|
709
|
+
"""
|
|
710
|
+
try:
|
|
711
|
+
pruning_options = [keep_last, older_than, snapshot, snapshots]
|
|
712
|
+
specified_options = [opt for opt in pruning_options if opt is not None]
|
|
713
|
+
|
|
714
|
+
if not specified_options:
|
|
715
|
+
logger.error(
|
|
716
|
+
"Must specify one pruning option: --keep-last, --older-than, --snapshot, or --snapshots"
|
|
717
|
+
)
|
|
718
|
+
sys.exit(1)
|
|
719
|
+
|
|
720
|
+
if len(specified_options) > 1:
|
|
721
|
+
logger.error(
|
|
722
|
+
"Pruning options are mutually exclusive. "
|
|
723
|
+
"Please specify only one of: --keep-last, --older-than, --snapshot, or --snapshots"
|
|
724
|
+
)
|
|
725
|
+
sys.exit(1)
|
|
726
|
+
|
|
727
|
+
if keep_last is not None and keep_last <= 0:
|
|
728
|
+
logger.error("--keep-last must be a positive number (greater than 0)")
|
|
729
|
+
sys.exit(1)
|
|
730
|
+
|
|
731
|
+
cfg = config_module.load_config(config)
|
|
732
|
+
config_module.validate_config(cfg)
|
|
733
|
+
|
|
734
|
+
database = db.StarRocksDB(
|
|
735
|
+
host=cfg["host"],
|
|
736
|
+
port=cfg["port"],
|
|
737
|
+
user=cfg["user"],
|
|
738
|
+
password=os.getenv("STARROCKS_PASSWORD"),
|
|
739
|
+
database=cfg["database"],
|
|
740
|
+
tls_config=cfg.get("tls"),
|
|
741
|
+
)
|
|
742
|
+
|
|
743
|
+
ops_database = config_module.get_ops_database(cfg)
|
|
744
|
+
|
|
745
|
+
with database:
|
|
746
|
+
was_created = schema.ensure_ops_schema(database, ops_database=ops_database)
|
|
747
|
+
if was_created:
|
|
748
|
+
logger.warning(
|
|
749
|
+
"ops schema was auto-created. Please run 'starrocks-br init' after populating config."
|
|
750
|
+
)
|
|
751
|
+
sys.exit(1)
|
|
752
|
+
|
|
753
|
+
healthy, message = health.check_cluster_health(database)
|
|
754
|
+
if not healthy:
|
|
755
|
+
logger.error(f"Cluster health check failed: {message}")
|
|
756
|
+
sys.exit(1)
|
|
757
|
+
|
|
758
|
+
logger.success(f"Cluster health: {message}")
|
|
759
|
+
|
|
760
|
+
repository.ensure_repository(database, cfg["repository"])
|
|
761
|
+
logger.success(f"Repository '{cfg['repository']}' verified")
|
|
762
|
+
|
|
763
|
+
if keep_last:
|
|
764
|
+
strategy = "keep_last"
|
|
765
|
+
strategy_kwargs = {"count": keep_last}
|
|
766
|
+
logger.info(f"Pruning strategy: Keep last {keep_last} backup(s)")
|
|
767
|
+
elif older_than:
|
|
768
|
+
strategy = "older_than"
|
|
769
|
+
strategy_kwargs = {"timestamp": older_than}
|
|
770
|
+
logger.info(f"Pruning strategy: Delete backups older than {older_than}")
|
|
771
|
+
elif snapshot:
|
|
772
|
+
strategy = "specific"
|
|
773
|
+
strategy_kwargs = {"snapshot": snapshot}
|
|
774
|
+
logger.info(f"Pruning strategy: Delete specific snapshot '{snapshot}'")
|
|
775
|
+
elif snapshots:
|
|
776
|
+
strategy = "multiple"
|
|
777
|
+
snapshot_list = [s.strip() for s in snapshots.split(",")]
|
|
778
|
+
strategy_kwargs = {"snapshots": snapshot_list}
|
|
779
|
+
logger.info(f"Pruning strategy: Delete {len(snapshot_list)} specific snapshot(s)")
|
|
780
|
+
|
|
781
|
+
if group:
|
|
782
|
+
logger.info(f"Filtering by inventory group: {group}")
|
|
783
|
+
|
|
784
|
+
all_backups = prune.get_successful_backups(
|
|
785
|
+
database, cfg["repository"], group=group, ops_database=ops_database
|
|
786
|
+
)
|
|
787
|
+
|
|
788
|
+
if not all_backups:
|
|
789
|
+
msg = f"No successful backups found in repository '{cfg['repository']}'"
|
|
790
|
+
if group:
|
|
791
|
+
msg += f" for inventory group '{group}'"
|
|
792
|
+
logger.info(msg)
|
|
793
|
+
sys.exit(0)
|
|
794
|
+
|
|
795
|
+
logger.info(f"Found {len(all_backups)} total backup(s) in repository")
|
|
796
|
+
|
|
797
|
+
if strategy in ["specific", "multiple"]:
|
|
798
|
+
snapshots_to_verify = (
|
|
799
|
+
[snapshot] if strategy == "specific" else strategy_kwargs["snapshots"]
|
|
800
|
+
)
|
|
801
|
+
for snap in snapshots_to_verify:
|
|
802
|
+
prune.verify_snapshot_exists(database, cfg["repository"], snap)
|
|
803
|
+
|
|
804
|
+
snapshots_to_delete = prune.filter_snapshots_to_delete(
|
|
805
|
+
all_backups, strategy, **strategy_kwargs
|
|
806
|
+
)
|
|
807
|
+
|
|
808
|
+
if not snapshots_to_delete:
|
|
809
|
+
logger.success("No snapshots to delete based on the specified criteria")
|
|
810
|
+
sys.exit(0)
|
|
811
|
+
|
|
812
|
+
logger.info("")
|
|
813
|
+
logger.info(f"Snapshots to delete: {len(snapshots_to_delete)}")
|
|
814
|
+
for snap in snapshots_to_delete:
|
|
815
|
+
logger.info(f" - {snap['label']} (finished: {snap['finished_at']})")
|
|
816
|
+
|
|
817
|
+
if keep_last:
|
|
818
|
+
kept_count = len(all_backups) - len(snapshots_to_delete)
|
|
819
|
+
logger.info(f"Snapshots to keep: {kept_count} (most recent)")
|
|
820
|
+
|
|
821
|
+
if dry_run:
|
|
822
|
+
logger.info("")
|
|
823
|
+
logger.warning("DRY RUN MODE - No snapshots will be deleted")
|
|
824
|
+
logger.info(f"Would delete {len(snapshots_to_delete)} snapshot(s)")
|
|
825
|
+
sys.exit(0)
|
|
826
|
+
|
|
827
|
+
if not yes:
|
|
828
|
+
logger.info("")
|
|
829
|
+
logger.warning(
|
|
830
|
+
f"This will permanently delete {len(snapshots_to_delete)} snapshot(s) from the repository"
|
|
831
|
+
)
|
|
832
|
+
confirm = click.confirm("Do you want to proceed?", default=False)
|
|
833
|
+
if not confirm:
|
|
834
|
+
logger.info("Prune operation cancelled by user")
|
|
835
|
+
sys.exit(1)
|
|
836
|
+
|
|
837
|
+
logger.info("")
|
|
838
|
+
logger.info("Starting snapshot deletion...")
|
|
839
|
+
deleted_count = 0
|
|
840
|
+
failed_count = 0
|
|
841
|
+
|
|
842
|
+
for snap in snapshots_to_delete:
|
|
843
|
+
try:
|
|
844
|
+
prune.execute_drop_snapshot(database, cfg["repository"], snap["label"])
|
|
845
|
+
prune.cleanup_backup_history(database, snap["label"], ops_database=ops_database)
|
|
846
|
+
deleted_count += 1
|
|
847
|
+
except Exception as e:
|
|
848
|
+
logger.error(f"Failed to delete snapshot '{snap['label']}': {e}")
|
|
849
|
+
failed_count += 1
|
|
850
|
+
|
|
851
|
+
logger.info("")
|
|
852
|
+
logger.success(f"Deleted {deleted_count} snapshot(s)")
|
|
853
|
+
|
|
854
|
+
if failed_count > 0:
|
|
855
|
+
logger.warning(f"Failed to delete {failed_count} snapshot(s)")
|
|
856
|
+
|
|
857
|
+
if keep_last:
|
|
858
|
+
logger.success(f"Kept {len(all_backups) - len(snapshots_to_delete)} most recent backup(s)")
|
|
859
|
+
|
|
860
|
+
sys.exit(0 if failed_count == 0 else 1)
|
|
861
|
+
|
|
862
|
+
except exceptions.ConfigFileNotFoundError as e:
|
|
863
|
+
error_handler.handle_config_file_not_found_error(e)
|
|
864
|
+
sys.exit(1)
|
|
865
|
+
except exceptions.ConfigValidationError as e:
|
|
866
|
+
error_handler.handle_config_validation_error(e, config)
|
|
867
|
+
sys.exit(1)
|
|
868
|
+
except FileNotFoundError as e:
|
|
869
|
+
error_handler.handle_config_file_not_found_error(exceptions.ConfigFileNotFoundError(str(e)))
|
|
870
|
+
sys.exit(1)
|
|
871
|
+
except ValueError as e:
|
|
872
|
+
logger.error(f"Validation error: {e}")
|
|
873
|
+
sys.exit(1)
|
|
874
|
+
except Exception as e:
|
|
875
|
+
logger.error(f"Unexpected error: {e}")
|
|
876
|
+
sys.exit(1)
|
|
877
|
+
|
|
878
|
+
|
|
607
879
|
if __name__ == "__main__":
|
|
608
880
|
cli()
|
starrocks_br/concurrency.py
CHANGED
|
@@ -17,45 +17,51 @@ from typing import Literal
|
|
|
17
17
|
from . import exceptions, logger, utils
|
|
18
18
|
|
|
19
19
|
|
|
20
|
-
def reserve_job_slot(db, scope: str, label: str) -> None:
|
|
21
|
-
"""Reserve a job slot in
|
|
20
|
+
def reserve_job_slot(db, scope: str, label: str, ops_database: str = "ops") -> None:
|
|
21
|
+
"""Reserve a job slot in the run_status table to prevent overlapping jobs.
|
|
22
22
|
|
|
23
23
|
We consider any row with state='ACTIVE' for the same scope as a conflict.
|
|
24
24
|
However, we implement self-healing logic to automatically clean up stale locks.
|
|
25
25
|
"""
|
|
26
|
-
active_jobs = _get_active_jobs_for_scope(db, scope)
|
|
26
|
+
active_jobs = _get_active_jobs_for_scope(db, scope, ops_database)
|
|
27
27
|
|
|
28
28
|
if not active_jobs:
|
|
29
|
-
_insert_new_job(db, scope, label)
|
|
29
|
+
_insert_new_job(db, scope, label, ops_database)
|
|
30
30
|
return
|
|
31
31
|
|
|
32
|
-
_handle_active_job_conflicts(db, scope, active_jobs)
|
|
32
|
+
_handle_active_job_conflicts(db, scope, active_jobs, ops_database)
|
|
33
33
|
|
|
34
|
-
_insert_new_job(db, scope, label)
|
|
34
|
+
_insert_new_job(db, scope, label, ops_database)
|
|
35
35
|
|
|
36
36
|
|
|
37
|
-
def _get_active_jobs_for_scope(
|
|
37
|
+
def _get_active_jobs_for_scope(
|
|
38
|
+
db, scope: str, ops_database: str = "ops"
|
|
39
|
+
) -> list[tuple[str, str, str]]:
|
|
38
40
|
"""Get all active jobs for the given scope."""
|
|
39
|
-
rows = db.query(
|
|
41
|
+
rows = db.query(
|
|
42
|
+
f"SELECT scope, label, state FROM {ops_database}.run_status WHERE state = 'ACTIVE'"
|
|
43
|
+
)
|
|
40
44
|
return [row for row in rows if row[0] == scope]
|
|
41
45
|
|
|
42
46
|
|
|
43
|
-
def _handle_active_job_conflicts(
|
|
47
|
+
def _handle_active_job_conflicts(
|
|
48
|
+
db, scope: str, active_jobs: list[tuple[str, str, str]], ops_database: str = "ops"
|
|
49
|
+
) -> None:
|
|
44
50
|
"""Handle conflicts with active jobs, cleaning up stale ones where possible."""
|
|
45
51
|
for active_scope, active_label, _ in active_jobs:
|
|
46
|
-
if _can_heal_stale_job(active_scope, active_label, db):
|
|
47
|
-
_cleanup_stale_job(db, active_scope, active_label)
|
|
52
|
+
if _can_heal_stale_job(active_scope, active_label, db, ops_database):
|
|
53
|
+
_cleanup_stale_job(db, active_scope, active_label, ops_database)
|
|
48
54
|
logger.success(f"Cleaned up stale backup job: {active_label}")
|
|
49
55
|
else:
|
|
50
56
|
_raise_concurrency_conflict(scope, active_jobs)
|
|
51
57
|
|
|
52
58
|
|
|
53
|
-
def _can_heal_stale_job(scope: str, label: str, db) -> bool:
|
|
59
|
+
def _can_heal_stale_job(scope: str, label: str, db, ops_database: str = "ops") -> bool:
|
|
54
60
|
"""Check if a stale job can be healed (only for backup jobs)."""
|
|
55
61
|
if scope != "backup":
|
|
56
62
|
return False
|
|
57
63
|
|
|
58
|
-
return _is_backup_job_stale(db, label)
|
|
64
|
+
return _is_backup_job_stale(db, label, ops_database)
|
|
59
65
|
|
|
60
66
|
|
|
61
67
|
def _raise_concurrency_conflict(scope: str, active_jobs: list[tuple[str, str, str]]) -> None:
|
|
@@ -63,22 +69,22 @@ def _raise_concurrency_conflict(scope: str, active_jobs: list[tuple[str, str, st
|
|
|
63
69
|
raise exceptions.ConcurrencyConflictError(scope, active_jobs)
|
|
64
70
|
|
|
65
71
|
|
|
66
|
-
def _insert_new_job(db, scope: str, label: str) -> None:
|
|
72
|
+
def _insert_new_job(db, scope: str, label: str, ops_database: str = "ops") -> None:
|
|
67
73
|
"""Insert a new active job record."""
|
|
68
74
|
sql = f"""
|
|
69
|
-
INSERT INTO
|
|
75
|
+
INSERT INTO {ops_database}.run_status (scope, label, state, started_at)
|
|
70
76
|
VALUES ({utils.quote_value(scope)}, {utils.quote_value(label)}, 'ACTIVE', NOW())
|
|
71
77
|
"""
|
|
72
78
|
db.execute(sql)
|
|
73
79
|
|
|
74
80
|
|
|
75
|
-
def _is_backup_job_stale(db, label: str) -> bool:
|
|
81
|
+
def _is_backup_job_stale(db, label: str, ops_database: str = "ops") -> bool:
|
|
76
82
|
"""Check if a backup job is stale by querying StarRocks SHOW BACKUP.
|
|
77
83
|
|
|
78
84
|
Returns True if the job is stale (not actually running), False if it's still active.
|
|
79
85
|
"""
|
|
80
86
|
try:
|
|
81
|
-
user_databases = _get_user_databases(db)
|
|
87
|
+
user_databases = _get_user_databases(db, ops_database)
|
|
82
88
|
|
|
83
89
|
for database_name in user_databases:
|
|
84
90
|
job_status = _check_backup_job_in_database(db, database_name, label)
|
|
@@ -98,9 +104,9 @@ def _is_backup_job_stale(db, label: str) -> bool:
|
|
|
98
104
|
return False
|
|
99
105
|
|
|
100
106
|
|
|
101
|
-
def _get_user_databases(db) -> list[str]:
|
|
107
|
+
def _get_user_databases(db, ops_database: str = "ops") -> list[str]:
|
|
102
108
|
"""Get list of user databases (excluding system databases)."""
|
|
103
|
-
system_databases = {"information_schema", "mysql", "sys",
|
|
109
|
+
system_databases = {"information_schema", "mysql", "sys", ops_database}
|
|
104
110
|
|
|
105
111
|
databases = db.query("SHOW DATABASES")
|
|
106
112
|
return [
|
|
@@ -159,10 +165,10 @@ def _extract_backup_info(result) -> tuple[str, str]:
|
|
|
159
165
|
return snapshot_name, state
|
|
160
166
|
|
|
161
167
|
|
|
162
|
-
def _cleanup_stale_job(db, scope: str, label: str) -> None:
|
|
168
|
+
def _cleanup_stale_job(db, scope: str, label: str, ops_database: str = "ops") -> None:
|
|
163
169
|
"""Clean up a stale job by updating its state to CANCELLED."""
|
|
164
170
|
sql = f"""
|
|
165
|
-
UPDATE
|
|
171
|
+
UPDATE {ops_database}.run_status
|
|
166
172
|
SET state='CANCELLED', finished_at=NOW()
|
|
167
173
|
WHERE scope={utils.quote_value(scope)} AND label={utils.quote_value(label)} AND state='ACTIVE'
|
|
168
174
|
"""
|
|
@@ -170,14 +176,18 @@ def _cleanup_stale_job(db, scope: str, label: str) -> None:
|
|
|
170
176
|
|
|
171
177
|
|
|
172
178
|
def complete_job_slot(
|
|
173
|
-
db,
|
|
179
|
+
db,
|
|
180
|
+
scope: str,
|
|
181
|
+
label: str,
|
|
182
|
+
final_state: Literal["FINISHED", "FAILED", "CANCELLED"],
|
|
183
|
+
ops_database: str = "ops",
|
|
174
184
|
) -> None:
|
|
175
185
|
"""Complete job slot and persist final state.
|
|
176
186
|
|
|
177
187
|
Simple approach: update the same row by scope/label.
|
|
178
188
|
"""
|
|
179
189
|
sql = f"""
|
|
180
|
-
UPDATE
|
|
190
|
+
UPDATE {ops_database}.run_status
|
|
181
191
|
SET state={utils.quote_value(final_state)}, finished_at=NOW()
|
|
182
192
|
WHERE scope={utils.quote_value(scope)} AND label={utils.quote_value(label)}
|
|
183
193
|
"""
|