starrocks-br 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. starrocks_br-0.1.0/PKG-INFO +12 -0
  2. starrocks_br-0.1.0/README.md +304 -0
  3. starrocks_br-0.1.0/pyproject.toml +44 -0
  4. starrocks_br-0.1.0/setup.cfg +4 -0
  5. starrocks_br-0.1.0/src/starrocks_br/__init__.py +1 -0
  6. starrocks_br-0.1.0/src/starrocks_br/cli.py +385 -0
  7. starrocks_br-0.1.0/src/starrocks_br/concurrency.py +177 -0
  8. starrocks_br-0.1.0/src/starrocks_br/config.py +41 -0
  9. starrocks_br-0.1.0/src/starrocks_br/db.py +88 -0
  10. starrocks_br-0.1.0/src/starrocks_br/executor.py +245 -0
  11. starrocks_br-0.1.0/src/starrocks_br/health.py +34 -0
  12. starrocks_br-0.1.0/src/starrocks_br/history.py +93 -0
  13. starrocks_br-0.1.0/src/starrocks_br/labels.py +52 -0
  14. starrocks_br-0.1.0/src/starrocks_br/logger.py +36 -0
  15. starrocks_br-0.1.0/src/starrocks_br/planner.py +280 -0
  16. starrocks_br-0.1.0/src/starrocks_br/repository.py +36 -0
  17. starrocks_br-0.1.0/src/starrocks_br/restore.py +493 -0
  18. starrocks_br-0.1.0/src/starrocks_br/schema.py +144 -0
  19. starrocks_br-0.1.0/src/starrocks_br.egg-info/PKG-INFO +12 -0
  20. starrocks_br-0.1.0/src/starrocks_br.egg-info/SOURCES.txt +35 -0
  21. starrocks_br-0.1.0/src/starrocks_br.egg-info/dependency_links.txt +1 -0
  22. starrocks_br-0.1.0/src/starrocks_br.egg-info/entry_points.txt +2 -0
  23. starrocks_br-0.1.0/src/starrocks_br.egg-info/requires.txt +8 -0
  24. starrocks_br-0.1.0/src/starrocks_br.egg-info/top_level.txt +1 -0
  25. starrocks_br-0.1.0/tests/test_cli.py +631 -0
  26. starrocks_br-0.1.0/tests/test_concurrency.py +194 -0
  27. starrocks_br-0.1.0/tests/test_config.py +78 -0
  28. starrocks_br-0.1.0/tests/test_db.py +69 -0
  29. starrocks_br-0.1.0/tests/test_executor.py +669 -0
  30. starrocks_br-0.1.0/tests/test_health_checks.py +61 -0
  31. starrocks_br-0.1.0/tests/test_history.py +61 -0
  32. starrocks_br-0.1.0/tests/test_labels.py +153 -0
  33. starrocks_br-0.1.0/tests/test_logger.py +256 -0
  34. starrocks_br-0.1.0/tests/test_planner.py +497 -0
  35. starrocks_br-0.1.0/tests/test_repository_sql.py +44 -0
  36. starrocks_br-0.1.0/tests/test_restore.py +1105 -0
  37. starrocks_br-0.1.0/tests/test_schema_setup.py +194 -0
@@ -0,0 +1,12 @@
1
+ Metadata-Version: 2.4
2
+ Name: starrocks-br
3
+ Version: 0.1.0
4
+ Summary: StarRocks Backup and Restore automation tool
5
+ Requires-Python: >=3.9
6
+ Requires-Dist: click<9,>=8.1.7
7
+ Requires-Dist: PyYAML<7,>=6.0.1
8
+ Requires-Dist: mysql-connector-python<10,>=9.0.0
9
+ Provides-Extra: dev
10
+ Requires-Dist: pytest<9,>=8.3.2; extra == "dev"
11
+ Requires-Dist: pytest-mock<4,>=3.14.0; extra == "dev"
12
+ Requires-Dist: pytest-cov<6,>=5.0.0; extra == "dev"
@@ -0,0 +1,304 @@
1
+ # StarRocks Backup & Restore - CLI Usage Guide
2
+
3
+ ## Overview
4
+
5
+ The StarRocks Backup & Restore tool provides production-grade automation for backup and restore operations.
6
+
7
+ ## Installation
8
+
9
+ ### Option 1: Using Devbox (Recommended for Development)
10
+
11
+ Devbox provides a reproducible development environment with all required tools.
12
+
13
+ ```bash
14
+ # Install devbox (if not already installed)
15
+ curl -fsSL https://get.jetpack.io/devbox | bash
16
+
17
+ # Start devbox shell - this automatically:
18
+ # - Installs Python 3.11 and dependencies
19
+ # - Creates a virtual environment (.venv)
20
+ # - Installs the package in editable mode
21
+ # - Installs development dependencies
22
+ devbox shell
23
+
24
+ # Once inside the devbox shell, you're ready to go:
25
+ starrocks-br --help
26
+ pytest
27
+ ```
28
+
29
+ ### Option 2: Manual Setup
30
+
31
+ ```bash
32
+ # Activate virtual environment
33
+ source .venv/bin/activate
34
+
35
+ # The CLI is already installed as: starrocks-br
36
+ ```
37
+
38
+ ## Configuration
39
+
40
+ Create a `config.yaml` file with your StarRocks connection details:
41
+
42
+ ```yaml
43
+ host: "127.0.0.1"
44
+ port: 9030
45
+ user: "root"
46
+ database: "your_database"
47
+ repository: "your_repo_name"
48
+ ```
49
+
50
+ **Password Management**
51
+
52
+ The database password must be provided via the `STARROCKS_PASSWORD` environment variable. This is a security measure to prevent storing credentials in configuration files.
53
+
54
+ ```bash
55
+ export STARROCKS_PASSWORD="your_password"
56
+ ```
57
+
58
+ **Note:** The repository must be created in StarRocks using the `CREATE REPOSITORY` command before running backups. For example:
59
+
60
+ ```sql
61
+ CREATE REPOSITORY `your_repo_name`
62
+ WITH S3
63
+ ON LOCATION "s3://your-backup-bucket/backups/"
64
+ PROPERTIES (
65
+ "aws.s3.access_key" = "your-access-key",
66
+ "aws.s3.secret_key" = "your-secret-key",
67
+ "aws.s3.endpoint" = "https://s3.amazonaws.com"
68
+ );
69
+ ```
70
+
71
+ ## Commands
72
+
73
+ ### Initialize Schema
74
+
75
+ Before running backups, initialize the ops database and control tables:
76
+
77
+ ```bash
78
+ starrocks-br init --config config.yaml
79
+ ```
80
+
81
+ **What it does:**
82
+ - Creates `ops` database
83
+ - Creates `ops.table_inventory`: Inventory groups mapping to databases/tables
84
+ - Creates `ops.backup_history`: Backup operation history
85
+ - Creates `ops.restore_history`: Restore operation history
86
+ - Creates `ops.run_status`: Job concurrency control
87
+ - Creates `ops.backup_partitions`: Partition manifest for each backup (enables intelligent restore)
88
+
89
+ **Next step:** Populate `ops.table_inventory` with your backup groups. For example:
90
+ ```sql
91
+ INSERT INTO ops.table_inventory (inventory_group, database_name, table_name)
92
+ VALUES
93
+ ('daily_facts', 'your_db', 'fact_sales'),
94
+ ('weekly_dims', 'your_db', 'dim_users'),
95
+ ('weekly_dims', 'your_db', 'dim_products'),
96
+ ('full_db_backup', 'your_db', '*'); -- Wildcard for all tables
97
+ ```
98
+
99
+ **Note:** If you skip this step, the ops schema will be auto-created on your first backup/restore operation (with a warning).
100
+
101
+ ### Backup Commands
102
+
103
+ Backups are managed through "inventory groups" defined in `ops.table_inventory`. This provides a flexible way to schedule different backup strategies for different sets of tables.
104
+
105
+ #### 1. Full Backup
106
+
107
+ Runs a full backup for all tables within a specified inventory group.
108
+
109
+ ```bash
110
+ starrocks-br backup full --config config.yaml --group <group_name>
111
+ ```
112
+
113
+ **Parameters:**
114
+ - `--group`: The inventory group to back up.
115
+
116
+ **Flow:**
117
+ 1. Load config → verify cluster health → ensure repository exists
118
+ 2. Reserve job slot (prevent concurrent backups)
119
+ 3. Query `ops.table_inventory` for all tables in the specified group.
120
+ 4. Generate a unique backup label.
121
+ 5. Build and execute the `BACKUP` command for the resolved tables.
122
+ 6. Poll `SHOW BACKUP` until completion and log results.
123
+
124
+ #### 2. Incremental Backup
125
+
126
+ Backs up only the partitions that have changed since the last successful full backup for a given inventory group.
127
+
128
+ ```bash
129
+ starrocks-br backup incremental --config config.yaml --group <group_name>
130
+ ```
131
+
132
+ **Parameters:**
133
+ - `--group`: The inventory group to back up.
134
+ - `--baseline-backup` (Optional): Specify a backup label to use as the baseline instead of the latest full backup.
135
+
136
+ **Flow:**
137
+ 1. Load config → verify cluster health → ensure repository exists
138
+ 2. Reserve job slot
139
+ 3. Find the latest successful full backup for the group to use as a baseline.
140
+ 4. Find recent partitions from `information_schema.partitions` for tables in the group.
141
+ 5. Generate a unique backup label.
142
+ 6. Build and execute the `BACKUP` command for the new partitions.
143
+ 7. Poll `SHOW BACKUP` until completion and log results.
144
+
145
+ ### Restore Commands
146
+
147
+ #### Intelligent Point-in-Time Restore
148
+
149
+ Restores data to a specific point in time using intelligent backup chain resolution. This command automatically determines the correct sequence of backups needed for restore.
150
+
151
+ ```bash
152
+ starrocks-br restore \
153
+ --config config.yaml \
154
+ --target-label my_db_20251016_inc \
155
+ --group daily_facts \
156
+ --rename-suffix _restored
157
+ ```
158
+
159
+ **Parameters:**
160
+ - `--config`: Path to config YAML file (required)
161
+ - `--target-label`: Backup label to restore to (required)
162
+ - `--group`: Optional inventory group to filter tables to restore
163
+ - `--rename-suffix`: Suffix for temporary tables during restore (default: `_restored`)
164
+
165
+ **How it works:**
166
+ - **For full backups**: Restores directly from the target backup
167
+ - **For incremental backups**: Automatically restores the base full backup first, then applies the incremental
168
+ - **Safety mechanism**: Uses temporary tables with the specified suffix, then performs atomic rename to make restored data live
169
+
170
+ **Two Restore Modes:**
171
+ - **Disaster Recovery**: Restore all tables from a backup (omit `--group` parameter)
172
+ - **Surgical Restore**: Restore only specific table groups (use `--group` parameter)
173
+
174
+ **Purpose of `--rename-suffix`:**
175
+ The restore process creates temporary tables with the specified suffix (e.g., `table_restored`) to avoid conflicts with existing tables. Once the restore is complete and verified, the tool performs atomic renames to swap the original tables with the restored data. This ensures data safety and allows for rollback if needed.
176
+
177
+ **Flow:**
178
+ 1. Load config → verify cluster health → ensure repository exists
179
+ 2. Find the correct restore sequence (full backup + optional incremental)
180
+ 3. Get tables from backup manifest (optionally filtered by group)
181
+ 4. Execute restore flow with atomic renames
182
+ 5. Log to `ops.restore_history`
183
+
184
+ ## Example Usage Scenarios
185
+
186
+ ### Initial Setup
187
+
188
+ ```bash
189
+ # 1. Initialize ops schema (run once)
190
+ starrocks-br init --config config.yaml
191
+
192
+ # 2. Populate table inventory with your groups (in StarRocks)
193
+ INSERT INTO ops.table_inventory (inventory_group, database_name, table_name)
194
+ VALUES
195
+ ('daily_incrementals', 'sales_db', 'fact_orders'),
196
+ ('weekly_full', 'sales_db', 'dim_customers'),
197
+ ('weekly_full', 'sales_db', 'dim_products');
198
+ ```
199
+
200
+ ### Daily Incremental Backup (Mon-Sat)
201
+
202
+ ```bash
203
+ # Run via cron at 01:00
204
+ 0 1 * * 1-6 cd /path/to/starrocks-br && source .venv/bin/activate && starrocks-br backup incremental --config config.yaml --group daily_incrementals
205
+ ```
206
+
207
+ ### Weekly Full Backup (Sunday)
208
+
209
+ ```bash
210
+ # Run via cron at 01:00 on Sundays
211
+ 0 1 * * 0 cd /path/to/starrocks-br && source .venv/bin/activate && starrocks-br backup full --config config.yaml --group weekly_full
212
+ ```
213
+
214
+ ### Disaster Recovery - Point-in-Time Restore
215
+
216
+ ```bash
217
+ # Restore to a specific backup point (automatically handles full + incremental chain)
218
+ starrocks-br restore \
219
+ --config config.yaml \
220
+ --target-label sales_db_20251015_inc \
221
+ --group daily_facts
222
+
223
+ # Restore all tables from a full backup
224
+ starrocks-br restore \
225
+ --config config.yaml \
226
+ --target-label sales_db_20251014_full
227
+ ```
228
+
229
+ ## Error Handling
230
+
231
+ The CLI automatically handles:
232
+
233
+ - **Job slot conflicts**: Prevents overlapping backups/restores via `ops.run_status`
234
+ - **Label collisions**: Automatically appends `_r#` suffix if label exists
235
+ - **Cluster health**: Verifies FE/BE status before starting operations
236
+ - **Repository validation**: Ensures repository exists and is accessible
237
+ - **Graceful failures**: All errors are logged to history tables with proper status
238
+
239
+ ## Monitoring
240
+
241
+ All operations are logged to:
242
+ - `ops.backup_history`: Tracks all backup attempts with status, timestamps, and error messages
243
+ - `ops.restore_history`: Tracks all restore operations with verification checksums
244
+ - `ops.run_status`: Tracks active jobs to prevent conflicts
245
+
246
+ Query examples:
247
+
248
+ ```sql
249
+ -- Check recent backup status
250
+ SELECT label, backup_type, status, started_at, finished_at
251
+ FROM ops.backup_history
252
+ ORDER BY started_at DESC
253
+ LIMIT 10;
254
+
255
+ -- Check for failed backups
256
+ SELECT label, backup_type, error_message, started_at
257
+ FROM ops.backup_history
258
+ WHERE status = 'FAILED'
259
+ ORDER BY started_at DESC;
260
+
261
+ -- Check active jobs
262
+ SELECT scope, label, state, started_at
263
+ FROM ops.run_status
264
+ WHERE state = 'ACTIVE';
265
+ ```
266
+
267
+ ## Testing
268
+
269
+ The project includes comprehensive tests (150+ tests, 90%+ coverage).
270
+
271
+ ```bash
272
+ # Run all tests
273
+ pytest
274
+
275
+ # Run with coverage report
276
+ pytest --cov=src/starrocks_br --cov-report=term-missing
277
+
278
+ # Run specific test file
279
+ pytest tests/test_cli.py -v
280
+ ```
281
+
282
+ ## Project Status
283
+
284
+ ✅ **Completed:**
285
+ - Config loader & validation
286
+ - Database connection wrapper
287
+ - StarRocks repository management
288
+ - Cluster health checks
289
+ - Job slot reservation (concurrency control)
290
+ - Label generation with collision handling
291
+ - Group-based backup planners for full and incremental backups
292
+ - Schema initialization (ops tables) with auto-creation
293
+ - Backup & restore history logging
294
+ - Backup executor with polling
295
+ - Intelligent point-in-time restore with automatic backup chain resolution
296
+ - Partition metadata tracking for backup manifests
297
+ - Atomic table rename for safe restore operations
298
+ - CLI commands (init, backup full, backup incremental, restore)
299
+
300
+ 📋 **Optional (deferred):**
301
+ - Exponential backoff retry for job conflicts
302
+ - Disk space precheck (requires external monitoring)
303
+ - Formal runbooks and DR drill procedures
304
+ - Monitoring dashboards and alerting integration
@@ -0,0 +1,44 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "starrocks-br"
7
+ version = "0.1.0"
8
+ description = "StarRocks Backup and Restore automation tool"
9
+ requires-python = ">=3.9"
10
+ dependencies = [
11
+ "click>=8.1.7,<9",
12
+ "PyYAML>=6.0.1,<7",
13
+ "mysql-connector-python>=9.0.0,<10",
14
+ ]
15
+
16
+ [project.optional-dependencies]
17
+ dev = [
18
+ "pytest>=8.3.2,<9",
19
+ "pytest-mock>=3.14.0,<4",
20
+ "pytest-cov>=5.0.0,<6",
21
+ ]
22
+
23
+ [project.scripts]
24
+ starrocks-br = "starrocks_br.cli:cli"
25
+
26
+ [tool.setuptools.packages.find]
27
+ where = ["src"]
28
+
29
+ [tool.pytest.ini_options]
30
+ pythonpath = ["src"]
31
+ addopts = "-q --cov=src/starrocks_br --cov-report=term-missing"
32
+ testpaths = ["tests"]
33
+
34
+ [tool.coverage.run]
35
+ branch = true
36
+ source = ["src/starrocks_br"]
37
+
38
+ [tool.coverage.report]
39
+ omit = ["*/__init__.py"]
40
+ show_missing = true
41
+ skip_covered = false
42
+
43
+ [tool.black]
44
+ line-length = 100
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1 @@
1
+ __all__ = ["cli", "config"]