starrocks-br 0.6.0__tar.gz → 0.7.0a1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/PKG-INFO +11 -1
- {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/README.md +10 -0
- {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/pyproject.toml +1 -1
- {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/src/starrocks_br/cli.py +205 -0
- starrocks_br-0.7.0a1/src/starrocks_br/prune.py +208 -0
- {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/src/starrocks_br.egg-info/PKG-INFO +11 -1
- {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/src/starrocks_br.egg-info/SOURCES.txt +3 -0
- {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/tests/test_cli_init.py +1 -1
- starrocks_br-0.7.0a1/tests/test_prune.py +380 -0
- starrocks_br-0.7.0a1/tests/test_prune_cli.py +742 -0
- {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/LICENSE +0 -0
- {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/setup.cfg +0 -0
- {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/src/starrocks_br/__init__.py +0 -0
- {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/src/starrocks_br/concurrency.py +0 -0
- {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/src/starrocks_br/config.py +0 -0
- {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/src/starrocks_br/db.py +0 -0
- {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/src/starrocks_br/error_handler.py +0 -0
- {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/src/starrocks_br/exceptions.py +0 -0
- {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/src/starrocks_br/executor.py +0 -0
- {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/src/starrocks_br/health.py +0 -0
- {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/src/starrocks_br/history.py +0 -0
- {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/src/starrocks_br/labels.py +0 -0
- {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/src/starrocks_br/logger.py +0 -0
- {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/src/starrocks_br/planner.py +0 -0
- {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/src/starrocks_br/repository.py +0 -0
- {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/src/starrocks_br/restore.py +0 -0
- {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/src/starrocks_br/schema.py +0 -0
- {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/src/starrocks_br/timezone.py +0 -0
- {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/src/starrocks_br/utils.py +0 -0
- {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/src/starrocks_br.egg-info/dependency_links.txt +0 -0
- {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/src/starrocks_br.egg-info/entry_points.txt +0 -0
- {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/src/starrocks_br.egg-info/requires.txt +0 -0
- {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/src/starrocks_br.egg-info/top_level.txt +0 -0
- {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/tests/test_cli_backup.py +0 -0
- {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/tests/test_cli_exceptions.py +0 -0
- {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/tests/test_cli_general.py +0 -0
- {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/tests/test_cli_restore.py +0 -0
- {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/tests/test_concurrency.py +0 -0
- {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/tests/test_config.py +0 -0
- {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/tests/test_db.py +0 -0
- {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/tests/test_error_handler.py +0 -0
- {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/tests/test_executor.py +0 -0
- {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/tests/test_health_checks.py +0 -0
- {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/tests/test_history.py +0 -0
- {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/tests/test_labels.py +0 -0
- {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/tests/test_logger.py +0 -0
- {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/tests/test_planner.py +0 -0
- {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/tests/test_repository_sql.py +0 -0
- {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/tests/test_restore.py +0 -0
- {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/tests/test_schema_setup.py +0 -0
- {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/tests/test_timezone.py +0 -0
- {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/tests/test_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: starrocks-br
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.7.0a1
|
|
4
4
|
Summary: StarRocks Backup and Restore automation tool
|
|
5
5
|
Requires-Python: >=3.10
|
|
6
6
|
Description-Content-Type: text/markdown
|
|
@@ -56,6 +56,7 @@ This tool adds **incremental backup capabilities** to StarRocks by leveraging na
|
|
|
56
56
|
- ✅ **Complete operation tracking** - Every backup and restore is logged with status, timestamps, and error details
|
|
57
57
|
- ✅ **Intelligent restore** - Automatically resolves backup chains (full + incremental) for you
|
|
58
58
|
- ✅ **Inventory groups** - Organize tables into groups with different backup strategies
|
|
59
|
+
- ✅ **Backup lifecycle management** - Prune old backups with flexible retention policies (keep-last, older-than, specific snapshots)
|
|
59
60
|
- ✅ **Job concurrency control** - Prevents conflicting operations
|
|
60
61
|
- ✅ **Safe restores** - Atomic rename mechanism prevents data loss during restore
|
|
61
62
|
- ✅ **Metadata management** - Dedicated `ops` database tracks all backup metadata and partition manifests
|
|
@@ -147,6 +148,15 @@ starrocks-br backup incremental --config config.yaml --group production
|
|
|
147
148
|
starrocks-br restore --config config.yaml --target-label mydb_20251118_full
|
|
148
149
|
```
|
|
149
150
|
|
|
151
|
+
**Prune old backups:**
|
|
152
|
+
```bash
|
|
153
|
+
# Keep only last 5 backups
|
|
154
|
+
starrocks-br prune --config config.yaml --keep-last 5
|
|
155
|
+
|
|
156
|
+
# Delete backups older than a date
|
|
157
|
+
starrocks-br prune --config config.yaml --older-than "2024-01-01 00:00:00"
|
|
158
|
+
```
|
|
159
|
+
|
|
150
160
|
See [Commands Reference](docs/commands.md) for all options.
|
|
151
161
|
|
|
152
162
|
## How It Works
|
|
@@ -38,6 +38,7 @@ This tool adds **incremental backup capabilities** to StarRocks by leveraging na
|
|
|
38
38
|
- ✅ **Complete operation tracking** - Every backup and restore is logged with status, timestamps, and error details
|
|
39
39
|
- ✅ **Intelligent restore** - Automatically resolves backup chains (full + incremental) for you
|
|
40
40
|
- ✅ **Inventory groups** - Organize tables into groups with different backup strategies
|
|
41
|
+
- ✅ **Backup lifecycle management** - Prune old backups with flexible retention policies (keep-last, older-than, specific snapshots)
|
|
41
42
|
- ✅ **Job concurrency control** - Prevents conflicting operations
|
|
42
43
|
- ✅ **Safe restores** - Atomic rename mechanism prevents data loss during restore
|
|
43
44
|
- ✅ **Metadata management** - Dedicated `ops` database tracks all backup metadata and partition manifests
|
|
@@ -129,6 +130,15 @@ starrocks-br backup incremental --config config.yaml --group production
|
|
|
129
130
|
starrocks-br restore --config config.yaml --target-label mydb_20251118_full
|
|
130
131
|
```
|
|
131
132
|
|
|
133
|
+
**Prune old backups:**
|
|
134
|
+
```bash
|
|
135
|
+
# Keep only last 5 backups
|
|
136
|
+
starrocks-br prune --config config.yaml --keep-last 5
|
|
137
|
+
|
|
138
|
+
# Delete backups older than a date
|
|
139
|
+
starrocks-br prune --config config.yaml --older-than "2024-01-01 00:00:00"
|
|
140
|
+
```
|
|
141
|
+
|
|
132
142
|
See [Commands Reference](docs/commands.md) for all options.
|
|
133
143
|
|
|
134
144
|
## How It Works
|
|
@@ -27,6 +27,7 @@ from . import (
|
|
|
27
27
|
labels,
|
|
28
28
|
logger,
|
|
29
29
|
planner,
|
|
30
|
+
prune,
|
|
30
31
|
repository,
|
|
31
32
|
restore,
|
|
32
33
|
schema,
|
|
@@ -671,5 +672,209 @@ def restore_command(config, target_label, group, table, rename_suffix, yes):
|
|
|
671
672
|
sys.exit(1)
|
|
672
673
|
|
|
673
674
|
|
|
675
|
+
@cli.command("prune")
|
|
676
|
+
@click.option("--config", required=True, help="Path to config YAML file")
|
|
677
|
+
@click.option(
|
|
678
|
+
"--group",
|
|
679
|
+
help="Optional inventory group to filter backups. Without this, prunes ALL backups.",
|
|
680
|
+
)
|
|
681
|
+
@click.option(
|
|
682
|
+
"--keep-last",
|
|
683
|
+
type=int,
|
|
684
|
+
help="Keep only the last N successful backups (deletes older ones)",
|
|
685
|
+
)
|
|
686
|
+
@click.option(
|
|
687
|
+
"--older-than",
|
|
688
|
+
help="Delete snapshots older than this timestamp (format: YYYY-MM-DD HH:MM:SS)",
|
|
689
|
+
)
|
|
690
|
+
@click.option("--snapshot", help="Delete a specific snapshot by name")
|
|
691
|
+
@click.option("--snapshots", help="Delete multiple specific snapshots (comma-separated)")
|
|
692
|
+
@click.option(
|
|
693
|
+
"--dry-run",
|
|
694
|
+
is_flag=True,
|
|
695
|
+
help="Show what would be deleted without actually deleting",
|
|
696
|
+
)
|
|
697
|
+
@click.option("--yes", is_flag=True, help="Skip confirmation prompt and proceed automatically")
|
|
698
|
+
def prune_command(config, group, keep_last, older_than, snapshot, snapshots, dry_run, yes):
|
|
699
|
+
"""Prune (delete) old backup snapshots from the repository.
|
|
700
|
+
|
|
701
|
+
This command helps manage repository storage by removing old or unwanted snapshots.
|
|
702
|
+
Supports multiple pruning strategies:
|
|
703
|
+
- Keep only the last N backups
|
|
704
|
+
- Delete backups older than a specific date
|
|
705
|
+
- Delete specific snapshots by name
|
|
706
|
+
|
|
707
|
+
Flow: load config → check health → ensure repository → query backups →
|
|
708
|
+
filter snapshots to delete → confirm → execute deletion → cleanup history
|
|
709
|
+
"""
|
|
710
|
+
try:
|
|
711
|
+
pruning_options = [keep_last, older_than, snapshot, snapshots]
|
|
712
|
+
specified_options = [opt for opt in pruning_options if opt is not None]
|
|
713
|
+
|
|
714
|
+
if not specified_options:
|
|
715
|
+
logger.error(
|
|
716
|
+
"Must specify one pruning option: --keep-last, --older-than, --snapshot, or --snapshots"
|
|
717
|
+
)
|
|
718
|
+
sys.exit(1)
|
|
719
|
+
|
|
720
|
+
if len(specified_options) > 1:
|
|
721
|
+
logger.error(
|
|
722
|
+
"Pruning options are mutually exclusive. "
|
|
723
|
+
"Please specify only one of: --keep-last, --older-than, --snapshot, or --snapshots"
|
|
724
|
+
)
|
|
725
|
+
sys.exit(1)
|
|
726
|
+
|
|
727
|
+
if keep_last is not None and keep_last <= 0:
|
|
728
|
+
logger.error("--keep-last must be a positive number (greater than 0)")
|
|
729
|
+
sys.exit(1)
|
|
730
|
+
|
|
731
|
+
cfg = config_module.load_config(config)
|
|
732
|
+
config_module.validate_config(cfg)
|
|
733
|
+
|
|
734
|
+
database = db.StarRocksDB(
|
|
735
|
+
host=cfg["host"],
|
|
736
|
+
port=cfg["port"],
|
|
737
|
+
user=cfg["user"],
|
|
738
|
+
password=os.getenv("STARROCKS_PASSWORD"),
|
|
739
|
+
database=cfg["database"],
|
|
740
|
+
tls_config=cfg.get("tls"),
|
|
741
|
+
)
|
|
742
|
+
|
|
743
|
+
ops_database = config_module.get_ops_database(cfg)
|
|
744
|
+
|
|
745
|
+
with database:
|
|
746
|
+
was_created = schema.ensure_ops_schema(database, ops_database=ops_database)
|
|
747
|
+
if was_created:
|
|
748
|
+
logger.warning(
|
|
749
|
+
"ops schema was auto-created. Please run 'starrocks-br init' after populating config."
|
|
750
|
+
)
|
|
751
|
+
sys.exit(1)
|
|
752
|
+
|
|
753
|
+
healthy, message = health.check_cluster_health(database)
|
|
754
|
+
if not healthy:
|
|
755
|
+
logger.error(f"Cluster health check failed: {message}")
|
|
756
|
+
sys.exit(1)
|
|
757
|
+
|
|
758
|
+
logger.success(f"Cluster health: {message}")
|
|
759
|
+
|
|
760
|
+
repository.ensure_repository(database, cfg["repository"])
|
|
761
|
+
logger.success(f"Repository '{cfg['repository']}' verified")
|
|
762
|
+
|
|
763
|
+
if keep_last:
|
|
764
|
+
strategy = "keep_last"
|
|
765
|
+
strategy_kwargs = {"count": keep_last}
|
|
766
|
+
logger.info(f"Pruning strategy: Keep last {keep_last} backup(s)")
|
|
767
|
+
elif older_than:
|
|
768
|
+
strategy = "older_than"
|
|
769
|
+
strategy_kwargs = {"timestamp": older_than}
|
|
770
|
+
logger.info(f"Pruning strategy: Delete backups older than {older_than}")
|
|
771
|
+
elif snapshot:
|
|
772
|
+
strategy = "specific"
|
|
773
|
+
strategy_kwargs = {"snapshot": snapshot}
|
|
774
|
+
logger.info(f"Pruning strategy: Delete specific snapshot '{snapshot}'")
|
|
775
|
+
elif snapshots:
|
|
776
|
+
strategy = "multiple"
|
|
777
|
+
snapshot_list = [s.strip() for s in snapshots.split(",")]
|
|
778
|
+
strategy_kwargs = {"snapshots": snapshot_list}
|
|
779
|
+
logger.info(f"Pruning strategy: Delete {len(snapshot_list)} specific snapshot(s)")
|
|
780
|
+
|
|
781
|
+
if group:
|
|
782
|
+
logger.info(f"Filtering by inventory group: {group}")
|
|
783
|
+
|
|
784
|
+
all_backups = prune.get_successful_backups(
|
|
785
|
+
database, cfg["repository"], group=group, ops_database=ops_database
|
|
786
|
+
)
|
|
787
|
+
|
|
788
|
+
if not all_backups:
|
|
789
|
+
msg = f"No successful backups found in repository '{cfg['repository']}'"
|
|
790
|
+
if group:
|
|
791
|
+
msg += f" for inventory group '{group}'"
|
|
792
|
+
logger.info(msg)
|
|
793
|
+
sys.exit(0)
|
|
794
|
+
|
|
795
|
+
logger.info(f"Found {len(all_backups)} total backup(s) in repository")
|
|
796
|
+
|
|
797
|
+
if strategy in ["specific", "multiple"]:
|
|
798
|
+
snapshots_to_verify = (
|
|
799
|
+
[snapshot] if strategy == "specific" else strategy_kwargs["snapshots"]
|
|
800
|
+
)
|
|
801
|
+
for snap in snapshots_to_verify:
|
|
802
|
+
prune.verify_snapshot_exists(database, cfg["repository"], snap)
|
|
803
|
+
|
|
804
|
+
snapshots_to_delete = prune.filter_snapshots_to_delete(
|
|
805
|
+
all_backups, strategy, **strategy_kwargs
|
|
806
|
+
)
|
|
807
|
+
|
|
808
|
+
if not snapshots_to_delete:
|
|
809
|
+
logger.success("No snapshots to delete based on the specified criteria")
|
|
810
|
+
sys.exit(0)
|
|
811
|
+
|
|
812
|
+
logger.info("")
|
|
813
|
+
logger.info(f"Snapshots to delete: {len(snapshots_to_delete)}")
|
|
814
|
+
for snap in snapshots_to_delete:
|
|
815
|
+
logger.info(f" - {snap['label']} (finished: {snap['finished_at']})")
|
|
816
|
+
|
|
817
|
+
if keep_last:
|
|
818
|
+
kept_count = len(all_backups) - len(snapshots_to_delete)
|
|
819
|
+
logger.info(f"Snapshots to keep: {kept_count} (most recent)")
|
|
820
|
+
|
|
821
|
+
if dry_run:
|
|
822
|
+
logger.info("")
|
|
823
|
+
logger.warning("DRY RUN MODE - No snapshots will be deleted")
|
|
824
|
+
logger.info(f"Would delete {len(snapshots_to_delete)} snapshot(s)")
|
|
825
|
+
sys.exit(0)
|
|
826
|
+
|
|
827
|
+
if not yes:
|
|
828
|
+
logger.info("")
|
|
829
|
+
logger.warning(
|
|
830
|
+
f"This will permanently delete {len(snapshots_to_delete)} snapshot(s) from the repository"
|
|
831
|
+
)
|
|
832
|
+
confirm = click.confirm("Do you want to proceed?", default=False)
|
|
833
|
+
if not confirm:
|
|
834
|
+
logger.info("Prune operation cancelled by user")
|
|
835
|
+
sys.exit(1)
|
|
836
|
+
|
|
837
|
+
logger.info("")
|
|
838
|
+
logger.info("Starting snapshot deletion...")
|
|
839
|
+
deleted_count = 0
|
|
840
|
+
failed_count = 0
|
|
841
|
+
|
|
842
|
+
for snap in snapshots_to_delete:
|
|
843
|
+
try:
|
|
844
|
+
prune.execute_drop_snapshot(database, cfg["repository"], snap["label"])
|
|
845
|
+
prune.cleanup_backup_history(database, snap["label"], ops_database=ops_database)
|
|
846
|
+
deleted_count += 1
|
|
847
|
+
except Exception as e:
|
|
848
|
+
logger.error(f"Failed to delete snapshot '{snap['label']}': {e}")
|
|
849
|
+
failed_count += 1
|
|
850
|
+
|
|
851
|
+
logger.info("")
|
|
852
|
+
logger.success(f"Deleted {deleted_count} snapshot(s)")
|
|
853
|
+
|
|
854
|
+
if failed_count > 0:
|
|
855
|
+
logger.warning(f"Failed to delete {failed_count} snapshot(s)")
|
|
856
|
+
|
|
857
|
+
if keep_last:
|
|
858
|
+
logger.success(f"Kept {len(all_backups) - len(snapshots_to_delete)} most recent backup(s)")
|
|
859
|
+
|
|
860
|
+
sys.exit(0 if failed_count == 0 else 1)
|
|
861
|
+
|
|
862
|
+
except exceptions.ConfigFileNotFoundError as e:
|
|
863
|
+
error_handler.handle_config_file_not_found_error(e)
|
|
864
|
+
sys.exit(1)
|
|
865
|
+
except exceptions.ConfigValidationError as e:
|
|
866
|
+
error_handler.handle_config_validation_error(e, config)
|
|
867
|
+
sys.exit(1)
|
|
868
|
+
except FileNotFoundError as e:
|
|
869
|
+
error_handler.handle_config_file_not_found_error(exceptions.ConfigFileNotFoundError(str(e)))
|
|
870
|
+
sys.exit(1)
|
|
871
|
+
except ValueError as e:
|
|
872
|
+
logger.error(f"Validation error: {e}")
|
|
873
|
+
sys.exit(1)
|
|
874
|
+
except Exception as e:
|
|
875
|
+
logger.error(f"Unexpected error: {e}")
|
|
876
|
+
sys.exit(1)
|
|
877
|
+
|
|
878
|
+
|
|
674
879
|
if __name__ == "__main__":
|
|
675
880
|
cli()
|
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
# Copyright 2025 deep-bi
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from datetime import datetime
|
|
16
|
+
|
|
17
|
+
from . import logger
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def get_successful_backups(
|
|
21
|
+
db, repository: str, group: str = None, ops_database: str = "ops"
|
|
22
|
+
) -> list[dict]:
|
|
23
|
+
"""Get all successful backups from backup_history, optionally filtered by group.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
db: Database connection
|
|
27
|
+
repository: Repository name to filter by
|
|
28
|
+
group: Optional inventory group to filter by
|
|
29
|
+
ops_database: Name of the ops database (defaults to "ops")
|
|
30
|
+
|
|
31
|
+
Returns:
|
|
32
|
+
List of backup records as dicts with keys: label, finished_at, inventory_group (if group filtering is used)
|
|
33
|
+
"""
|
|
34
|
+
if group:
|
|
35
|
+
sql = f"""
|
|
36
|
+
SELECT DISTINCT
|
|
37
|
+
bh.label,
|
|
38
|
+
bh.finished_at,
|
|
39
|
+
ti.inventory_group
|
|
40
|
+
FROM {ops_database}.backup_history bh
|
|
41
|
+
INNER JOIN {ops_database}.backup_partitions bp ON bh.label = bp.label
|
|
42
|
+
INNER JOIN {ops_database}.table_inventory ti
|
|
43
|
+
ON bp.database_name = ti.database_name
|
|
44
|
+
AND (bp.table_name = ti.table_name OR ti.table_name = '*')
|
|
45
|
+
WHERE bh.repository = '{repository}'
|
|
46
|
+
AND bh.status = 'FINISHED'
|
|
47
|
+
AND ti.inventory_group = '{group}'
|
|
48
|
+
ORDER BY bh.finished_at ASC
|
|
49
|
+
"""
|
|
50
|
+
else:
|
|
51
|
+
sql = f"""
|
|
52
|
+
SELECT
|
|
53
|
+
label,
|
|
54
|
+
finished_at
|
|
55
|
+
FROM {ops_database}.backup_history
|
|
56
|
+
WHERE repository = '{repository}'
|
|
57
|
+
AND status = 'FINISHED'
|
|
58
|
+
ORDER BY finished_at ASC
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
rows = db.query(sql)
|
|
62
|
+
results = []
|
|
63
|
+
|
|
64
|
+
for row in rows:
|
|
65
|
+
if group:
|
|
66
|
+
results.append({"label": row[0], "finished_at": str(row[1]), "inventory_group": row[2]})
|
|
67
|
+
else:
|
|
68
|
+
results.append({"label": row[0], "finished_at": str(row[1])})
|
|
69
|
+
|
|
70
|
+
return results
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def filter_snapshots_to_delete(
|
|
74
|
+
all_snapshots: list[dict], strategy: str, **kwargs
|
|
75
|
+
) -> list[dict]:
|
|
76
|
+
"""Filter snapshots based on pruning strategy.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
all_snapshots: List of snapshot dicts (must be sorted by finished_at ASC)
|
|
80
|
+
strategy: Pruning strategy - 'keep_last', 'older_than', 'specific', or 'multiple'
|
|
81
|
+
**kwargs: Strategy-specific parameters:
|
|
82
|
+
- keep_last: 'count' (int) - number of backups to keep
|
|
83
|
+
- older_than: 'timestamp' (str) - timestamp in 'YYYY-MM-DD HH:MM:SS' format
|
|
84
|
+
- specific: 'snapshot' (str) - specific snapshot name
|
|
85
|
+
- multiple: 'snapshots' (list) - list of snapshot names
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
List of snapshots to delete
|
|
89
|
+
"""
|
|
90
|
+
if strategy == "keep_last":
|
|
91
|
+
count = kwargs.get("count")
|
|
92
|
+
if count is None or count <= 0:
|
|
93
|
+
raise ValueError("keep_last strategy requires a positive count")
|
|
94
|
+
|
|
95
|
+
# Keep the last N, delete the rest
|
|
96
|
+
if len(all_snapshots) <= count:
|
|
97
|
+
return []
|
|
98
|
+
return all_snapshots[: -count] # Delete all except last N
|
|
99
|
+
|
|
100
|
+
elif strategy == "older_than":
|
|
101
|
+
timestamp_str = kwargs.get("timestamp")
|
|
102
|
+
if not timestamp_str:
|
|
103
|
+
raise ValueError("older_than strategy requires a timestamp")
|
|
104
|
+
|
|
105
|
+
try:
|
|
106
|
+
cutoff = datetime.strptime(timestamp_str, "%Y-%m-%d %H:%M:%S")
|
|
107
|
+
except ValueError as e:
|
|
108
|
+
raise ValueError(
|
|
109
|
+
f"Invalid timestamp format '{timestamp_str}'. Expected 'YYYY-MM-DD HH:MM:SS'"
|
|
110
|
+
) from e
|
|
111
|
+
|
|
112
|
+
to_delete = []
|
|
113
|
+
for snapshot in all_snapshots:
|
|
114
|
+
snapshot_time = datetime.strptime(snapshot["finished_at"], "%Y-%m-%d %H:%M:%S")
|
|
115
|
+
if snapshot_time < cutoff:
|
|
116
|
+
to_delete.append(snapshot)
|
|
117
|
+
|
|
118
|
+
return to_delete
|
|
119
|
+
|
|
120
|
+
elif strategy == "specific":
|
|
121
|
+
snapshot_name = kwargs.get("snapshot")
|
|
122
|
+
if not snapshot_name:
|
|
123
|
+
raise ValueError("specific strategy requires a snapshot name")
|
|
124
|
+
|
|
125
|
+
for snapshot in all_snapshots:
|
|
126
|
+
if snapshot["label"] == snapshot_name:
|
|
127
|
+
return [snapshot]
|
|
128
|
+
|
|
129
|
+
return []
|
|
130
|
+
|
|
131
|
+
elif strategy == "multiple":
|
|
132
|
+
snapshot_names = kwargs.get("snapshots")
|
|
133
|
+
if not snapshot_names:
|
|
134
|
+
raise ValueError("multiple strategy requires a list of snapshot names")
|
|
135
|
+
|
|
136
|
+
to_delete = []
|
|
137
|
+
for snapshot in all_snapshots:
|
|
138
|
+
if snapshot["label"] in snapshot_names:
|
|
139
|
+
to_delete.append(snapshot)
|
|
140
|
+
|
|
141
|
+
return to_delete
|
|
142
|
+
|
|
143
|
+
else:
|
|
144
|
+
raise ValueError(f"Unknown pruning strategy: {strategy}")
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def verify_snapshot_exists(db, repository: str, snapshot_name: str) -> bool:
|
|
148
|
+
"""Verify that a snapshot exists in the repository.
|
|
149
|
+
|
|
150
|
+
Args:
|
|
151
|
+
db: Database connection
|
|
152
|
+
repository: Repository name
|
|
153
|
+
snapshot_name: Snapshot name to verify
|
|
154
|
+
|
|
155
|
+
Returns:
|
|
156
|
+
True if snapshot exists, False otherwise
|
|
157
|
+
|
|
158
|
+
Raises:
|
|
159
|
+
Exception if snapshot is not found
|
|
160
|
+
"""
|
|
161
|
+
sql = f"SHOW SNAPSHOT ON {repository} WHERE SNAPSHOT = '{snapshot_name}'"
|
|
162
|
+
|
|
163
|
+
try:
|
|
164
|
+
rows = db.query(sql)
|
|
165
|
+
if not rows:
|
|
166
|
+
raise Exception(f"Snapshot '{snapshot_name}' not found in repository '{repository}'")
|
|
167
|
+
return True
|
|
168
|
+
except Exception as e:
|
|
169
|
+
logger.error(f"Failed to verify snapshot '{snapshot_name}': {e}")
|
|
170
|
+
raise
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def execute_drop_snapshot(db, repository: str, snapshot_name: str) -> None:
|
|
174
|
+
"""Execute DROP SNAPSHOT command for a single snapshot.
|
|
175
|
+
|
|
176
|
+
Args:
|
|
177
|
+
db: Database connection
|
|
178
|
+
repository: Repository name
|
|
179
|
+
snapshot_name: Snapshot name to delete
|
|
180
|
+
|
|
181
|
+
Raises:
|
|
182
|
+
Exception if deletion fails
|
|
183
|
+
"""
|
|
184
|
+
sql = f"DROP SNAPSHOT ON {repository} WHERE SNAPSHOT = '{snapshot_name}'"
|
|
185
|
+
|
|
186
|
+
try:
|
|
187
|
+
logger.info(f"Deleting snapshot: {snapshot_name}")
|
|
188
|
+
db.execute(sql)
|
|
189
|
+
logger.success(f"Successfully deleted snapshot: {snapshot_name}")
|
|
190
|
+
except Exception as e:
|
|
191
|
+
logger.error(f"Failed to delete snapshot '{snapshot_name}': {e}")
|
|
192
|
+
raise
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def cleanup_backup_history(db, snapshot_label: str, ops_database: str = "ops") -> None:
|
|
196
|
+
"""Remove backup history entry after snapshot deletion.
|
|
197
|
+
|
|
198
|
+
Args:
|
|
199
|
+
db: Database connection
|
|
200
|
+
snapshot_label: Snapshot label to remove from history
|
|
201
|
+
ops_database: Name of the ops database (defaults to "ops")
|
|
202
|
+
"""
|
|
203
|
+
try:
|
|
204
|
+
db.execute(f"DELETE FROM {ops_database}.backup_partitions WHERE label = '{snapshot_label}'")
|
|
205
|
+
db.execute(f"DELETE FROM {ops_database}.backup_history WHERE label = '{snapshot_label}'")
|
|
206
|
+
logger.debug(f"Cleaned up backup history for: {snapshot_label}")
|
|
207
|
+
except Exception as e:
|
|
208
|
+
logger.warning(f"Failed to cleanup backup history for '{snapshot_label}': {e}")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: starrocks-br
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.7.0a1
|
|
4
4
|
Summary: StarRocks Backup and Restore automation tool
|
|
5
5
|
Requires-Python: >=3.10
|
|
6
6
|
Description-Content-Type: text/markdown
|
|
@@ -56,6 +56,7 @@ This tool adds **incremental backup capabilities** to StarRocks by leveraging na
|
|
|
56
56
|
- ✅ **Complete operation tracking** - Every backup and restore is logged with status, timestamps, and error details
|
|
57
57
|
- ✅ **Intelligent restore** - Automatically resolves backup chains (full + incremental) for you
|
|
58
58
|
- ✅ **Inventory groups** - Organize tables into groups with different backup strategies
|
|
59
|
+
- ✅ **Backup lifecycle management** - Prune old backups with flexible retention policies (keep-last, older-than, specific snapshots)
|
|
59
60
|
- ✅ **Job concurrency control** - Prevents conflicting operations
|
|
60
61
|
- ✅ **Safe restores** - Atomic rename mechanism prevents data loss during restore
|
|
61
62
|
- ✅ **Metadata management** - Dedicated `ops` database tracks all backup metadata and partition manifests
|
|
@@ -147,6 +148,15 @@ starrocks-br backup incremental --config config.yaml --group production
|
|
|
147
148
|
starrocks-br restore --config config.yaml --target-label mydb_20251118_full
|
|
148
149
|
```
|
|
149
150
|
|
|
151
|
+
**Prune old backups:**
|
|
152
|
+
```bash
|
|
153
|
+
# Keep only last 5 backups
|
|
154
|
+
starrocks-br prune --config config.yaml --keep-last 5
|
|
155
|
+
|
|
156
|
+
# Delete backups older than a date
|
|
157
|
+
starrocks-br prune --config config.yaml --older-than "2024-01-01 00:00:00"
|
|
158
|
+
```
|
|
159
|
+
|
|
150
160
|
See [Commands Reference](docs/commands.md) for all options.
|
|
151
161
|
|
|
152
162
|
## How It Works
|
|
@@ -14,6 +14,7 @@ src/starrocks_br/history.py
|
|
|
14
14
|
src/starrocks_br/labels.py
|
|
15
15
|
src/starrocks_br/logger.py
|
|
16
16
|
src/starrocks_br/planner.py
|
|
17
|
+
src/starrocks_br/prune.py
|
|
17
18
|
src/starrocks_br/repository.py
|
|
18
19
|
src/starrocks_br/restore.py
|
|
19
20
|
src/starrocks_br/schema.py
|
|
@@ -40,6 +41,8 @@ tests/test_history.py
|
|
|
40
41
|
tests/test_labels.py
|
|
41
42
|
tests/test_logger.py
|
|
42
43
|
tests/test_planner.py
|
|
44
|
+
tests/test_prune.py
|
|
45
|
+
tests/test_prune_cli.py
|
|
43
46
|
tests/test_repository_sql.py
|
|
44
47
|
tests/test_restore.py
|
|
45
48
|
tests/test_schema_setup.py
|
|
@@ -53,7 +53,7 @@ def test_init_fails_when_repository_not_found(config_file, mock_db, setup_passwo
|
|
|
53
53
|
side_effect=RuntimeError(
|
|
54
54
|
"Repository 'test_repo' not found. Please create it first using:\n"
|
|
55
55
|
" CREATE REPOSITORY test_repo WITH BROKER ON LOCATION '...' PROPERTIES(...)\n"
|
|
56
|
-
"For examples, see: https://docs.starrocks.io/docs/sql-reference/sql-statements/backup_restore/CREATE_REPOSITORY/"
|
|
56
|
+
"For examples, see: https://docs.starrocks.io/docs/sql-reference/sql-statements/data-definition/backup_restore/CREATE_REPOSITORY/"
|
|
57
57
|
),
|
|
58
58
|
)
|
|
59
59
|
|