starrocks-br 0.6.0__tar.gz → 0.7.0a1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/PKG-INFO +11 -1
  2. {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/README.md +10 -0
  3. {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/pyproject.toml +1 -1
  4. {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/src/starrocks_br/cli.py +205 -0
  5. starrocks_br-0.7.0a1/src/starrocks_br/prune.py +208 -0
  6. {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/src/starrocks_br.egg-info/PKG-INFO +11 -1
  7. {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/src/starrocks_br.egg-info/SOURCES.txt +3 -0
  8. {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/tests/test_cli_init.py +1 -1
  9. starrocks_br-0.7.0a1/tests/test_prune.py +380 -0
  10. starrocks_br-0.7.0a1/tests/test_prune_cli.py +742 -0
  11. {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/LICENSE +0 -0
  12. {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/setup.cfg +0 -0
  13. {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/src/starrocks_br/__init__.py +0 -0
  14. {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/src/starrocks_br/concurrency.py +0 -0
  15. {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/src/starrocks_br/config.py +0 -0
  16. {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/src/starrocks_br/db.py +0 -0
  17. {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/src/starrocks_br/error_handler.py +0 -0
  18. {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/src/starrocks_br/exceptions.py +0 -0
  19. {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/src/starrocks_br/executor.py +0 -0
  20. {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/src/starrocks_br/health.py +0 -0
  21. {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/src/starrocks_br/history.py +0 -0
  22. {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/src/starrocks_br/labels.py +0 -0
  23. {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/src/starrocks_br/logger.py +0 -0
  24. {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/src/starrocks_br/planner.py +0 -0
  25. {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/src/starrocks_br/repository.py +0 -0
  26. {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/src/starrocks_br/restore.py +0 -0
  27. {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/src/starrocks_br/schema.py +0 -0
  28. {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/src/starrocks_br/timezone.py +0 -0
  29. {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/src/starrocks_br/utils.py +0 -0
  30. {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/src/starrocks_br.egg-info/dependency_links.txt +0 -0
  31. {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/src/starrocks_br.egg-info/entry_points.txt +0 -0
  32. {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/src/starrocks_br.egg-info/requires.txt +0 -0
  33. {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/src/starrocks_br.egg-info/top_level.txt +0 -0
  34. {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/tests/test_cli_backup.py +0 -0
  35. {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/tests/test_cli_exceptions.py +0 -0
  36. {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/tests/test_cli_general.py +0 -0
  37. {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/tests/test_cli_restore.py +0 -0
  38. {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/tests/test_concurrency.py +0 -0
  39. {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/tests/test_config.py +0 -0
  40. {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/tests/test_db.py +0 -0
  41. {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/tests/test_error_handler.py +0 -0
  42. {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/tests/test_executor.py +0 -0
  43. {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/tests/test_health_checks.py +0 -0
  44. {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/tests/test_history.py +0 -0
  45. {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/tests/test_labels.py +0 -0
  46. {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/tests/test_logger.py +0 -0
  47. {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/tests/test_planner.py +0 -0
  48. {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/tests/test_repository_sql.py +0 -0
  49. {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/tests/test_restore.py +0 -0
  50. {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/tests/test_schema_setup.py +0 -0
  51. {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/tests/test_timezone.py +0 -0
  52. {starrocks_br-0.6.0 → starrocks_br-0.7.0a1}/tests/test_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: starrocks-br
3
- Version: 0.6.0
3
+ Version: 0.7.0a1
4
4
  Summary: StarRocks Backup and Restore automation tool
5
5
  Requires-Python: >=3.10
6
6
  Description-Content-Type: text/markdown
@@ -56,6 +56,7 @@ This tool adds **incremental backup capabilities** to StarRocks by leveraging na
56
56
  - ✅ **Complete operation tracking** - Every backup and restore is logged with status, timestamps, and error details
57
57
  - ✅ **Intelligent restore** - Automatically resolves backup chains (full + incremental) for you
58
58
  - ✅ **Inventory groups** - Organize tables into groups with different backup strategies
59
+ - ✅ **Backup lifecycle management** - Prune old backups with flexible retention policies (keep-last, older-than, specific snapshots)
59
60
  - ✅ **Job concurrency control** - Prevents conflicting operations
60
61
  - ✅ **Safe restores** - Atomic rename mechanism prevents data loss during restore
61
62
  - ✅ **Metadata management** - Dedicated `ops` database tracks all backup metadata and partition manifests
@@ -147,6 +148,15 @@ starrocks-br backup incremental --config config.yaml --group production
147
148
  starrocks-br restore --config config.yaml --target-label mydb_20251118_full
148
149
  ```
149
150
 
151
+ **Prune old backups:**
152
+ ```bash
153
+ # Keep only last 5 backups
154
+ starrocks-br prune --config config.yaml --keep-last 5
155
+
156
+ # Delete backups older than a date
157
+ starrocks-br prune --config config.yaml --older-than "2024-01-01 00:00:00"
158
+ ```
159
+
150
160
  See [Commands Reference](docs/commands.md) for all options.
151
161
 
152
162
  ## How It Works
@@ -38,6 +38,7 @@ This tool adds **incremental backup capabilities** to StarRocks by leveraging na
38
38
  - ✅ **Complete operation tracking** - Every backup and restore is logged with status, timestamps, and error details
39
39
  - ✅ **Intelligent restore** - Automatically resolves backup chains (full + incremental) for you
40
40
  - ✅ **Inventory groups** - Organize tables into groups with different backup strategies
41
+ - ✅ **Backup lifecycle management** - Prune old backups with flexible retention policies (keep-last, older-than, specific snapshots)
41
42
  - ✅ **Job concurrency control** - Prevents conflicting operations
42
43
  - ✅ **Safe restores** - Atomic rename mechanism prevents data loss during restore
43
44
  - ✅ **Metadata management** - Dedicated `ops` database tracks all backup metadata and partition manifests
@@ -129,6 +130,15 @@ starrocks-br backup incremental --config config.yaml --group production
129
130
  starrocks-br restore --config config.yaml --target-label mydb_20251118_full
130
131
  ```
131
132
 
133
+ **Prune old backups:**
134
+ ```bash
135
+ # Keep only last 5 backups
136
+ starrocks-br prune --config config.yaml --keep-last 5
137
+
138
+ # Delete backups older than a date
139
+ starrocks-br prune --config config.yaml --older-than "2024-01-01 00:00:00"
140
+ ```
141
+
132
142
  See [Commands Reference](docs/commands.md) for all options.
133
143
 
134
144
  ## How It Works
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "starrocks-br"
7
- version = "0.6.0"
7
+ version = "0.7.0a1"
8
8
  description = "StarRocks Backup and Restore automation tool"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -27,6 +27,7 @@ from . import (
27
27
  labels,
28
28
  logger,
29
29
  planner,
30
+ prune,
30
31
  repository,
31
32
  restore,
32
33
  schema,
@@ -671,5 +672,209 @@ def restore_command(config, target_label, group, table, rename_suffix, yes):
671
672
  sys.exit(1)
672
673
 
673
674
 
675
+ @cli.command("prune")
676
+ @click.option("--config", required=True, help="Path to config YAML file")
677
+ @click.option(
678
+ "--group",
679
+ help="Optional inventory group to filter backups. Without this, prunes ALL backups.",
680
+ )
681
+ @click.option(
682
+ "--keep-last",
683
+ type=int,
684
+ help="Keep only the last N successful backups (deletes older ones)",
685
+ )
686
+ @click.option(
687
+ "--older-than",
688
+ help="Delete snapshots older than this timestamp (format: YYYY-MM-DD HH:MM:SS)",
689
+ )
690
+ @click.option("--snapshot", help="Delete a specific snapshot by name")
691
+ @click.option("--snapshots", help="Delete multiple specific snapshots (comma-separated)")
692
+ @click.option(
693
+ "--dry-run",
694
+ is_flag=True,
695
+ help="Show what would be deleted without actually deleting",
696
+ )
697
+ @click.option("--yes", is_flag=True, help="Skip confirmation prompt and proceed automatically")
698
+ def prune_command(config, group, keep_last, older_than, snapshot, snapshots, dry_run, yes):
699
+ """Prune (delete) old backup snapshots from the repository.
700
+
701
+ This command helps manage repository storage by removing old or unwanted snapshots.
702
+ Supports multiple pruning strategies:
703
+ - Keep only the last N backups
704
+ - Delete backups older than a specific date
705
+ - Delete specific snapshots by name
706
+
707
+ Flow: load config → check health → ensure repository → query backups →
708
+ filter snapshots to delete → confirm → execute deletion → cleanup history
709
+ """
710
+ try:
711
+ pruning_options = [keep_last, older_than, snapshot, snapshots]
712
+ specified_options = [opt for opt in pruning_options if opt is not None]
713
+
714
+ if not specified_options:
715
+ logger.error(
716
+ "Must specify one pruning option: --keep-last, --older-than, --snapshot, or --snapshots"
717
+ )
718
+ sys.exit(1)
719
+
720
+ if len(specified_options) > 1:
721
+ logger.error(
722
+ "Pruning options are mutually exclusive. "
723
+ "Please specify only one of: --keep-last, --older-than, --snapshot, or --snapshots"
724
+ )
725
+ sys.exit(1)
726
+
727
+ if keep_last is not None and keep_last <= 0:
728
+ logger.error("--keep-last must be a positive number (greater than 0)")
729
+ sys.exit(1)
730
+
731
+ cfg = config_module.load_config(config)
732
+ config_module.validate_config(cfg)
733
+
734
+ database = db.StarRocksDB(
735
+ host=cfg["host"],
736
+ port=cfg["port"],
737
+ user=cfg["user"],
738
+ password=os.getenv("STARROCKS_PASSWORD"),
739
+ database=cfg["database"],
740
+ tls_config=cfg.get("tls"),
741
+ )
742
+
743
+ ops_database = config_module.get_ops_database(cfg)
744
+
745
+ with database:
746
+ was_created = schema.ensure_ops_schema(database, ops_database=ops_database)
747
+ if was_created:
748
+ logger.warning(
749
+ "ops schema was auto-created. Please run 'starrocks-br init' after populating config."
750
+ )
751
+ sys.exit(1)
752
+
753
+ healthy, message = health.check_cluster_health(database)
754
+ if not healthy:
755
+ logger.error(f"Cluster health check failed: {message}")
756
+ sys.exit(1)
757
+
758
+ logger.success(f"Cluster health: {message}")
759
+
760
+ repository.ensure_repository(database, cfg["repository"])
761
+ logger.success(f"Repository '{cfg['repository']}' verified")
762
+
763
+ if keep_last:
764
+ strategy = "keep_last"
765
+ strategy_kwargs = {"count": keep_last}
766
+ logger.info(f"Pruning strategy: Keep last {keep_last} backup(s)")
767
+ elif older_than:
768
+ strategy = "older_than"
769
+ strategy_kwargs = {"timestamp": older_than}
770
+ logger.info(f"Pruning strategy: Delete backups older than {older_than}")
771
+ elif snapshot:
772
+ strategy = "specific"
773
+ strategy_kwargs = {"snapshot": snapshot}
774
+ logger.info(f"Pruning strategy: Delete specific snapshot '{snapshot}'")
775
+ elif snapshots:
776
+ strategy = "multiple"
777
+ snapshot_list = [s.strip() for s in snapshots.split(",")]
778
+ strategy_kwargs = {"snapshots": snapshot_list}
779
+ logger.info(f"Pruning strategy: Delete {len(snapshot_list)} specific snapshot(s)")
780
+
781
+ if group:
782
+ logger.info(f"Filtering by inventory group: {group}")
783
+
784
+ all_backups = prune.get_successful_backups(
785
+ database, cfg["repository"], group=group, ops_database=ops_database
786
+ )
787
+
788
+ if not all_backups:
789
+ msg = f"No successful backups found in repository '{cfg['repository']}'"
790
+ if group:
791
+ msg += f" for inventory group '{group}'"
792
+ logger.info(msg)
793
+ sys.exit(0)
794
+
795
+ logger.info(f"Found {len(all_backups)} total backup(s) in repository")
796
+
797
+ if strategy in ["specific", "multiple"]:
798
+ snapshots_to_verify = (
799
+ [snapshot] if strategy == "specific" else strategy_kwargs["snapshots"]
800
+ )
801
+ for snap in snapshots_to_verify:
802
+ prune.verify_snapshot_exists(database, cfg["repository"], snap)
803
+
804
+ snapshots_to_delete = prune.filter_snapshots_to_delete(
805
+ all_backups, strategy, **strategy_kwargs
806
+ )
807
+
808
+ if not snapshots_to_delete:
809
+ logger.success("No snapshots to delete based on the specified criteria")
810
+ sys.exit(0)
811
+
812
+ logger.info("")
813
+ logger.info(f"Snapshots to delete: {len(snapshots_to_delete)}")
814
+ for snap in snapshots_to_delete:
815
+ logger.info(f" - {snap['label']} (finished: {snap['finished_at']})")
816
+
817
+ if keep_last:
818
+ kept_count = len(all_backups) - len(snapshots_to_delete)
819
+ logger.info(f"Snapshots to keep: {kept_count} (most recent)")
820
+
821
+ if dry_run:
822
+ logger.info("")
823
+ logger.warning("DRY RUN MODE - No snapshots will be deleted")
824
+ logger.info(f"Would delete {len(snapshots_to_delete)} snapshot(s)")
825
+ sys.exit(0)
826
+
827
+ if not yes:
828
+ logger.info("")
829
+ logger.warning(
830
+ f"This will permanently delete {len(snapshots_to_delete)} snapshot(s) from the repository"
831
+ )
832
+ confirm = click.confirm("Do you want to proceed?", default=False)
833
+ if not confirm:
834
+ logger.info("Prune operation cancelled by user")
835
+ sys.exit(1)
836
+
837
+ logger.info("")
838
+ logger.info("Starting snapshot deletion...")
839
+ deleted_count = 0
840
+ failed_count = 0
841
+
842
+ for snap in snapshots_to_delete:
843
+ try:
844
+ prune.execute_drop_snapshot(database, cfg["repository"], snap["label"])
845
+ prune.cleanup_backup_history(database, snap["label"], ops_database=ops_database)
846
+ deleted_count += 1
847
+ except Exception as e:
848
+ logger.error(f"Failed to delete snapshot '{snap['label']}': {e}")
849
+ failed_count += 1
850
+
851
+ logger.info("")
852
+ logger.success(f"Deleted {deleted_count} snapshot(s)")
853
+
854
+ if failed_count > 0:
855
+ logger.warning(f"Failed to delete {failed_count} snapshot(s)")
856
+
857
+ if keep_last:
858
+ logger.success(f"Kept {len(all_backups) - len(snapshots_to_delete)} most recent backup(s)")
859
+
860
+ sys.exit(0 if failed_count == 0 else 1)
861
+
862
+ except exceptions.ConfigFileNotFoundError as e:
863
+ error_handler.handle_config_file_not_found_error(e)
864
+ sys.exit(1)
865
+ except exceptions.ConfigValidationError as e:
866
+ error_handler.handle_config_validation_error(e, config)
867
+ sys.exit(1)
868
+ except FileNotFoundError as e:
869
+ error_handler.handle_config_file_not_found_error(exceptions.ConfigFileNotFoundError(str(e)))
870
+ sys.exit(1)
871
+ except ValueError as e:
872
+ logger.error(f"Validation error: {e}")
873
+ sys.exit(1)
874
+ except Exception as e:
875
+ logger.error(f"Unexpected error: {e}")
876
+ sys.exit(1)
877
+
878
+
674
879
  if __name__ == "__main__":
675
880
  cli()
@@ -0,0 +1,208 @@
1
+ # Copyright 2025 deep-bi
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from datetime import datetime
16
+
17
+ from . import logger
18
+
19
+
20
+ def get_successful_backups(
21
+ db, repository: str, group: str = None, ops_database: str = "ops"
22
+ ) -> list[dict]:
23
+ """Get all successful backups from backup_history, optionally filtered by group.
24
+
25
+ Args:
26
+ db: Database connection
27
+ repository: Repository name to filter by
28
+ group: Optional inventory group to filter by
29
+ ops_database: Name of the ops database (defaults to "ops")
30
+
31
+ Returns:
32
+ List of backup records as dicts with keys: label, finished_at, inventory_group (if group filtering is used)
33
+ """
34
+ if group:
35
+ sql = f"""
36
+ SELECT DISTINCT
37
+ bh.label,
38
+ bh.finished_at,
39
+ ti.inventory_group
40
+ FROM {ops_database}.backup_history bh
41
+ INNER JOIN {ops_database}.backup_partitions bp ON bh.label = bp.label
42
+ INNER JOIN {ops_database}.table_inventory ti
43
+ ON bp.database_name = ti.database_name
44
+ AND (bp.table_name = ti.table_name OR ti.table_name = '*')
45
+ WHERE bh.repository = '{repository}'
46
+ AND bh.status = 'FINISHED'
47
+ AND ti.inventory_group = '{group}'
48
+ ORDER BY bh.finished_at ASC
49
+ """
50
+ else:
51
+ sql = f"""
52
+ SELECT
53
+ label,
54
+ finished_at
55
+ FROM {ops_database}.backup_history
56
+ WHERE repository = '{repository}'
57
+ AND status = 'FINISHED'
58
+ ORDER BY finished_at ASC
59
+ """
60
+
61
+ rows = db.query(sql)
62
+ results = []
63
+
64
+ for row in rows:
65
+ if group:
66
+ results.append({"label": row[0], "finished_at": str(row[1]), "inventory_group": row[2]})
67
+ else:
68
+ results.append({"label": row[0], "finished_at": str(row[1])})
69
+
70
+ return results
71
+
72
+
73
+ def filter_snapshots_to_delete(
74
+ all_snapshots: list[dict], strategy: str, **kwargs
75
+ ) -> list[dict]:
76
+ """Filter snapshots based on pruning strategy.
77
+
78
+ Args:
79
+ all_snapshots: List of snapshot dicts (must be sorted by finished_at ASC)
80
+ strategy: Pruning strategy - 'keep_last', 'older_than', 'specific', or 'multiple'
81
+ **kwargs: Strategy-specific parameters:
82
+ - keep_last: 'count' (int) - number of backups to keep
83
+ - older_than: 'timestamp' (str) - timestamp in 'YYYY-MM-DD HH:MM:SS' format
84
+ - specific: 'snapshot' (str) - specific snapshot name
85
+ - multiple: 'snapshots' (list) - list of snapshot names
86
+
87
+ Returns:
88
+ List of snapshots to delete
89
+ """
90
+ if strategy == "keep_last":
91
+ count = kwargs.get("count")
92
+ if count is None or count <= 0:
93
+ raise ValueError("keep_last strategy requires a positive count")
94
+
95
+ # Keep the last N, delete the rest
96
+ if len(all_snapshots) <= count:
97
+ return []
98
+ return all_snapshots[: -count] # Delete all except last N
99
+
100
+ elif strategy == "older_than":
101
+ timestamp_str = kwargs.get("timestamp")
102
+ if not timestamp_str:
103
+ raise ValueError("older_than strategy requires a timestamp")
104
+
105
+ try:
106
+ cutoff = datetime.strptime(timestamp_str, "%Y-%m-%d %H:%M:%S")
107
+ except ValueError as e:
108
+ raise ValueError(
109
+ f"Invalid timestamp format '{timestamp_str}'. Expected 'YYYY-MM-DD HH:MM:SS'"
110
+ ) from e
111
+
112
+ to_delete = []
113
+ for snapshot in all_snapshots:
114
+ snapshot_time = datetime.strptime(snapshot["finished_at"], "%Y-%m-%d %H:%M:%S")
115
+ if snapshot_time < cutoff:
116
+ to_delete.append(snapshot)
117
+
118
+ return to_delete
119
+
120
+ elif strategy == "specific":
121
+ snapshot_name = kwargs.get("snapshot")
122
+ if not snapshot_name:
123
+ raise ValueError("specific strategy requires a snapshot name")
124
+
125
+ for snapshot in all_snapshots:
126
+ if snapshot["label"] == snapshot_name:
127
+ return [snapshot]
128
+
129
+ return []
130
+
131
+ elif strategy == "multiple":
132
+ snapshot_names = kwargs.get("snapshots")
133
+ if not snapshot_names:
134
+ raise ValueError("multiple strategy requires a list of snapshot names")
135
+
136
+ to_delete = []
137
+ for snapshot in all_snapshots:
138
+ if snapshot["label"] in snapshot_names:
139
+ to_delete.append(snapshot)
140
+
141
+ return to_delete
142
+
143
+ else:
144
+ raise ValueError(f"Unknown pruning strategy: {strategy}")
145
+
146
+
147
+ def verify_snapshot_exists(db, repository: str, snapshot_name: str) -> bool:
148
+ """Verify that a snapshot exists in the repository.
149
+
150
+ Args:
151
+ db: Database connection
152
+ repository: Repository name
153
+ snapshot_name: Snapshot name to verify
154
+
155
+ Returns:
156
+ True if snapshot exists, False otherwise
157
+
158
+ Raises:
159
+ Exception if snapshot is not found
160
+ """
161
+ sql = f"SHOW SNAPSHOT ON {repository} WHERE SNAPSHOT = '{snapshot_name}'"
162
+
163
+ try:
164
+ rows = db.query(sql)
165
+ if not rows:
166
+ raise Exception(f"Snapshot '{snapshot_name}' not found in repository '{repository}'")
167
+ return True
168
+ except Exception as e:
169
+ logger.error(f"Failed to verify snapshot '{snapshot_name}': {e}")
170
+ raise
171
+
172
+
173
+ def execute_drop_snapshot(db, repository: str, snapshot_name: str) -> None:
174
+ """Execute DROP SNAPSHOT command for a single snapshot.
175
+
176
+ Args:
177
+ db: Database connection
178
+ repository: Repository name
179
+ snapshot_name: Snapshot name to delete
180
+
181
+ Raises:
182
+ Exception if deletion fails
183
+ """
184
+ sql = f"DROP SNAPSHOT ON {repository} WHERE SNAPSHOT = '{snapshot_name}'"
185
+
186
+ try:
187
+ logger.info(f"Deleting snapshot: {snapshot_name}")
188
+ db.execute(sql)
189
+ logger.success(f"Successfully deleted snapshot: {snapshot_name}")
190
+ except Exception as e:
191
+ logger.error(f"Failed to delete snapshot '{snapshot_name}': {e}")
192
+ raise
193
+
194
+
195
+ def cleanup_backup_history(db, snapshot_label: str, ops_database: str = "ops") -> None:
196
+ """Remove backup history entry after snapshot deletion.
197
+
198
+ Args:
199
+ db: Database connection
200
+ snapshot_label: Snapshot label to remove from history
201
+ ops_database: Name of the ops database (defaults to "ops")
202
+ """
203
+ try:
204
+ db.execute(f"DELETE FROM {ops_database}.backup_partitions WHERE label = '{snapshot_label}'")
205
+ db.execute(f"DELETE FROM {ops_database}.backup_history WHERE label = '{snapshot_label}'")
206
+ logger.debug(f"Cleaned up backup history for: {snapshot_label}")
207
+ except Exception as e:
208
+ logger.warning(f"Failed to cleanup backup history for '{snapshot_label}': {e}")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: starrocks-br
3
- Version: 0.6.0
3
+ Version: 0.7.0a1
4
4
  Summary: StarRocks Backup and Restore automation tool
5
5
  Requires-Python: >=3.10
6
6
  Description-Content-Type: text/markdown
@@ -56,6 +56,7 @@ This tool adds **incremental backup capabilities** to StarRocks by leveraging na
56
56
  - ✅ **Complete operation tracking** - Every backup and restore is logged with status, timestamps, and error details
57
57
  - ✅ **Intelligent restore** - Automatically resolves backup chains (full + incremental) for you
58
58
  - ✅ **Inventory groups** - Organize tables into groups with different backup strategies
59
+ - ✅ **Backup lifecycle management** - Prune old backups with flexible retention policies (keep-last, older-than, specific snapshots)
59
60
  - ✅ **Job concurrency control** - Prevents conflicting operations
60
61
  - ✅ **Safe restores** - Atomic rename mechanism prevents data loss during restore
61
62
  - ✅ **Metadata management** - Dedicated `ops` database tracks all backup metadata and partition manifests
@@ -147,6 +148,15 @@ starrocks-br backup incremental --config config.yaml --group production
147
148
  starrocks-br restore --config config.yaml --target-label mydb_20251118_full
148
149
  ```
149
150
 
151
+ **Prune old backups:**
152
+ ```bash
153
+ # Keep only last 5 backups
154
+ starrocks-br prune --config config.yaml --keep-last 5
155
+
156
+ # Delete backups older than a date
157
+ starrocks-br prune --config config.yaml --older-than "2024-01-01 00:00:00"
158
+ ```
159
+
150
160
  See [Commands Reference](docs/commands.md) for all options.
151
161
 
152
162
  ## How It Works
@@ -14,6 +14,7 @@ src/starrocks_br/history.py
14
14
  src/starrocks_br/labels.py
15
15
  src/starrocks_br/logger.py
16
16
  src/starrocks_br/planner.py
17
+ src/starrocks_br/prune.py
17
18
  src/starrocks_br/repository.py
18
19
  src/starrocks_br/restore.py
19
20
  src/starrocks_br/schema.py
@@ -40,6 +41,8 @@ tests/test_history.py
40
41
  tests/test_labels.py
41
42
  tests/test_logger.py
42
43
  tests/test_planner.py
44
+ tests/test_prune.py
45
+ tests/test_prune_cli.py
43
46
  tests/test_repository_sql.py
44
47
  tests/test_restore.py
45
48
  tests/test_schema_setup.py
@@ -53,7 +53,7 @@ def test_init_fails_when_repository_not_found(config_file, mock_db, setup_passwo
53
53
  side_effect=RuntimeError(
54
54
  "Repository 'test_repo' not found. Please create it first using:\n"
55
55
  " CREATE REPOSITORY test_repo WITH BROKER ON LOCATION '...' PROPERTIES(...)\n"
56
- "For examples, see: https://docs.starrocks.io/docs/sql-reference/sql-statements/backup_restore/CREATE_REPOSITORY/"
56
+ "For examples, see: https://docs.starrocks.io/docs/sql-reference/sql-statements/data-definition/backup_restore/CREATE_REPOSITORY/"
57
57
  ),
58
58
  )
59
59