starrocks-br 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- starrocks_br-0.1.0/PKG-INFO +12 -0
- starrocks_br-0.1.0/README.md +304 -0
- starrocks_br-0.1.0/pyproject.toml +44 -0
- starrocks_br-0.1.0/setup.cfg +4 -0
- starrocks_br-0.1.0/src/starrocks_br/__init__.py +1 -0
- starrocks_br-0.1.0/src/starrocks_br/cli.py +385 -0
- starrocks_br-0.1.0/src/starrocks_br/concurrency.py +177 -0
- starrocks_br-0.1.0/src/starrocks_br/config.py +41 -0
- starrocks_br-0.1.0/src/starrocks_br/db.py +88 -0
- starrocks_br-0.1.0/src/starrocks_br/executor.py +245 -0
- starrocks_br-0.1.0/src/starrocks_br/health.py +34 -0
- starrocks_br-0.1.0/src/starrocks_br/history.py +93 -0
- starrocks_br-0.1.0/src/starrocks_br/labels.py +52 -0
- starrocks_br-0.1.0/src/starrocks_br/logger.py +36 -0
- starrocks_br-0.1.0/src/starrocks_br/planner.py +280 -0
- starrocks_br-0.1.0/src/starrocks_br/repository.py +36 -0
- starrocks_br-0.1.0/src/starrocks_br/restore.py +493 -0
- starrocks_br-0.1.0/src/starrocks_br/schema.py +144 -0
- starrocks_br-0.1.0/src/starrocks_br.egg-info/PKG-INFO +12 -0
- starrocks_br-0.1.0/src/starrocks_br.egg-info/SOURCES.txt +35 -0
- starrocks_br-0.1.0/src/starrocks_br.egg-info/dependency_links.txt +1 -0
- starrocks_br-0.1.0/src/starrocks_br.egg-info/entry_points.txt +2 -0
- starrocks_br-0.1.0/src/starrocks_br.egg-info/requires.txt +8 -0
- starrocks_br-0.1.0/src/starrocks_br.egg-info/top_level.txt +1 -0
- starrocks_br-0.1.0/tests/test_cli.py +631 -0
- starrocks_br-0.1.0/tests/test_concurrency.py +194 -0
- starrocks_br-0.1.0/tests/test_config.py +78 -0
- starrocks_br-0.1.0/tests/test_db.py +69 -0
- starrocks_br-0.1.0/tests/test_executor.py +669 -0
- starrocks_br-0.1.0/tests/test_health_checks.py +61 -0
- starrocks_br-0.1.0/tests/test_history.py +61 -0
- starrocks_br-0.1.0/tests/test_labels.py +153 -0
- starrocks_br-0.1.0/tests/test_logger.py +256 -0
- starrocks_br-0.1.0/tests/test_planner.py +497 -0
- starrocks_br-0.1.0/tests/test_repository_sql.py +44 -0
- starrocks_br-0.1.0/tests/test_restore.py +1105 -0
- starrocks_br-0.1.0/tests/test_schema_setup.py +194 -0
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: starrocks-br
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: StarRocks Backup and Restore automation tool
|
|
5
|
+
Requires-Python: >=3.9
|
|
6
|
+
Requires-Dist: click<9,>=8.1.7
|
|
7
|
+
Requires-Dist: PyYAML<7,>=6.0.1
|
|
8
|
+
Requires-Dist: mysql-connector-python<10,>=9.0.0
|
|
9
|
+
Provides-Extra: dev
|
|
10
|
+
Requires-Dist: pytest<9,>=8.3.2; extra == "dev"
|
|
11
|
+
Requires-Dist: pytest-mock<4,>=3.14.0; extra == "dev"
|
|
12
|
+
Requires-Dist: pytest-cov<6,>=5.0.0; extra == "dev"
|
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
# StarRocks Backup & Restore - CLI Usage Guide
|
|
2
|
+
|
|
3
|
+
## Overview
|
|
4
|
+
|
|
5
|
+
The StarRocks Backup & Restore tool provides production-grade automation for backup and restore operations.
|
|
6
|
+
|
|
7
|
+
## Installation
|
|
8
|
+
|
|
9
|
+
### Option 1: Using Devbox (Recommended for Development)
|
|
10
|
+
|
|
11
|
+
Devbox provides a reproducible development environment with all required tools.
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
# Install devbox (if not already installed)
|
|
15
|
+
curl -fsSL https://get.jetpack.io/devbox | bash
|
|
16
|
+
|
|
17
|
+
# Start devbox shell - this automatically:
|
|
18
|
+
# - Installs Python 3.11 and dependencies
|
|
19
|
+
# - Creates a virtual environment (.venv)
|
|
20
|
+
# - Installs the package in editable mode
|
|
21
|
+
# - Installs development dependencies
|
|
22
|
+
devbox shell
|
|
23
|
+
|
|
24
|
+
# Once inside the devbox shell, you're ready to go:
|
|
25
|
+
starrocks-br --help
|
|
26
|
+
pytest
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
### Option 2: Manual Setup
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
# Activate virtual environment
|
|
33
|
+
source .venv/bin/activate
|
|
34
|
+
|
|
35
|
+
# The CLI is already installed as: starrocks-br
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## Configuration
|
|
39
|
+
|
|
40
|
+
Create a `config.yaml` file with your StarRocks connection details:
|
|
41
|
+
|
|
42
|
+
```yaml
|
|
43
|
+
host: "127.0.0.1"
|
|
44
|
+
port: 9030
|
|
45
|
+
user: "root"
|
|
46
|
+
database: "your_database"
|
|
47
|
+
repository: "your_repo_name"
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
**Password Management**
|
|
51
|
+
|
|
52
|
+
The database password must be provided via the `STARROCKS_PASSWORD` environment variable. This is a security measure to prevent storing credentials in configuration files.
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
export STARROCKS_PASSWORD="your_password"
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
**Note:** The repository must be created in StarRocks using the `CREATE REPOSITORY` command before running backups. For example:
|
|
59
|
+
|
|
60
|
+
```sql
|
|
61
|
+
CREATE REPOSITORY `your_repo_name`
|
|
62
|
+
WITH S3
|
|
63
|
+
ON LOCATION "s3://your-backup-bucket/backups/"
|
|
64
|
+
PROPERTIES (
|
|
65
|
+
"aws.s3.access_key" = "your-access-key",
|
|
66
|
+
"aws.s3.secret_key" = "your-secret-key",
|
|
67
|
+
"aws.s3.endpoint" = "https://s3.amazonaws.com"
|
|
68
|
+
);
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
## Commands
|
|
72
|
+
|
|
73
|
+
### Initialize Schema
|
|
74
|
+
|
|
75
|
+
Before running backups, initialize the ops database and control tables:
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
starrocks-br init --config config.yaml
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
**What it does:**
|
|
82
|
+
- Creates `ops` database
|
|
83
|
+
- Creates `ops.table_inventory`: Inventory groups mapping to databases/tables
|
|
84
|
+
- Creates `ops.backup_history`: Backup operation history
|
|
85
|
+
- Creates `ops.restore_history`: Restore operation history
|
|
86
|
+
- Creates `ops.run_status`: Job concurrency control
|
|
87
|
+
- Creates `ops.backup_partitions`: Partition manifest for each backup (enables intelligent restore)
|
|
88
|
+
|
|
89
|
+
**Next step:** Populate `ops.table_inventory` with your backup groups. For example:
|
|
90
|
+
```sql
|
|
91
|
+
INSERT INTO ops.table_inventory (inventory_group, database_name, table_name)
|
|
92
|
+
VALUES
|
|
93
|
+
('daily_facts', 'your_db', 'fact_sales'),
|
|
94
|
+
('weekly_dims', 'your_db', 'dim_users'),
|
|
95
|
+
('weekly_dims', 'your_db', 'dim_products'),
|
|
96
|
+
('full_db_backup', 'your_db', '*'); -- Wildcard for all tables
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
**Note:** If you skip this step, the ops schema will be auto-created on your first backup/restore operation (with a warning).
|
|
100
|
+
|
|
101
|
+
### Backup Commands
|
|
102
|
+
|
|
103
|
+
Backups are managed through "inventory groups" defined in `ops.table_inventory`. This provides a flexible way to schedule different backup strategies for different sets of tables.
|
|
104
|
+
|
|
105
|
+
#### 1. Full Backup
|
|
106
|
+
|
|
107
|
+
Runs a full backup for all tables within a specified inventory group.
|
|
108
|
+
|
|
109
|
+
```bash
|
|
110
|
+
starrocks-br backup full --config config.yaml --group <group_name>
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
**Parameters:**
|
|
114
|
+
- `--group`: The inventory group to back up.
|
|
115
|
+
|
|
116
|
+
**Flow:**
|
|
117
|
+
1. Load config → verify cluster health → ensure repository exists
|
|
118
|
+
2. Reserve job slot (prevent concurrent backups)
|
|
119
|
+
3. Query `ops.table_inventory` for all tables in the specified group.
|
|
120
|
+
4. Generate a unique backup label.
|
|
121
|
+
5. Build and execute the `BACKUP` command for the resolved tables.
|
|
122
|
+
6. Poll `SHOW BACKUP` until completion and log results.
|
|
123
|
+
|
|
124
|
+
#### 2. Incremental Backup
|
|
125
|
+
|
|
126
|
+
Backs up only the partitions that have changed since the last successful full backup for a given inventory group.
|
|
127
|
+
|
|
128
|
+
```bash
|
|
129
|
+
starrocks-br backup incremental --config config.yaml --group <group_name>
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
**Parameters:**
|
|
133
|
+
- `--group`: The inventory group to back up.
|
|
134
|
+
- `--baseline-backup` (Optional): Specify a backup label to use as the baseline instead of the latest full backup.
|
|
135
|
+
|
|
136
|
+
**Flow:**
|
|
137
|
+
1. Load config → verify cluster health → ensure repository exists
|
|
138
|
+
2. Reserve job slot
|
|
139
|
+
3. Find the latest successful full backup for the group to use as a baseline.
|
|
140
|
+
4. Find recent partitions from `information_schema.partitions` for tables in the group.
|
|
141
|
+
5. Generate a unique backup label.
|
|
142
|
+
6. Build and execute the `BACKUP` command for the new partitions.
|
|
143
|
+
7. Poll `SHOW BACKUP` until completion and log results.
|
|
144
|
+
|
|
145
|
+
### Restore Commands
|
|
146
|
+
|
|
147
|
+
#### Intelligent Point-in-Time Restore
|
|
148
|
+
|
|
149
|
+
Restores data to a specific point in time using intelligent backup chain resolution. This command automatically determines the correct sequence of backups needed for restore.
|
|
150
|
+
|
|
151
|
+
```bash
|
|
152
|
+
starrocks-br restore \
|
|
153
|
+
--config config.yaml \
|
|
154
|
+
--target-label my_db_20251016_inc \
|
|
155
|
+
--group daily_facts \
|
|
156
|
+
--rename-suffix _restored
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
**Parameters:**
|
|
160
|
+
- `--config`: Path to config YAML file (required)
|
|
161
|
+
- `--target-label`: Backup label to restore to (required)
|
|
162
|
+
- `--group`: Optional inventory group to filter tables to restore
|
|
163
|
+
- `--rename-suffix`: Suffix for temporary tables during restore (default: `_restored`)
|
|
164
|
+
|
|
165
|
+
**How it works:**
|
|
166
|
+
- **For full backups**: Restores directly from the target backup
|
|
167
|
+
- **For incremental backups**: Automatically restores the base full backup first, then applies the incremental
|
|
168
|
+
- **Safety mechanism**: Uses temporary tables with the specified suffix, then performs atomic rename to make restored data live
|
|
169
|
+
|
|
170
|
+
**Two Restore Modes:**
|
|
171
|
+
- **Disaster Recovery**: Restore all tables from a backup (omit `--group` parameter)
|
|
172
|
+
- **Surgical Restore**: Restore only specific table groups (use `--group` parameter)
|
|
173
|
+
|
|
174
|
+
**Purpose of `--rename-suffix`:**
|
|
175
|
+
The restore process creates temporary tables with the specified suffix (e.g., `table_restored`) to avoid conflicts with existing tables. Once the restore is complete and verified, the tool performs atomic renames to swap the original tables with the restored data. This ensures data safety and allows for rollback if needed.
|
|
176
|
+
|
|
177
|
+
**Flow:**
|
|
178
|
+
1. Load config → verify cluster health → ensure repository exists
|
|
179
|
+
2. Find the correct restore sequence (full backup + optional incremental)
|
|
180
|
+
3. Get tables from backup manifest (optionally filtered by group)
|
|
181
|
+
4. Execute restore flow with atomic renames
|
|
182
|
+
5. Log to `ops.restore_history`
|
|
183
|
+
|
|
184
|
+
## Example Usage Scenarios
|
|
185
|
+
|
|
186
|
+
### Initial Setup
|
|
187
|
+
|
|
188
|
+
```bash
|
|
189
|
+
# 1. Initialize ops schema (run once)
|
|
190
|
+
starrocks-br init --config config.yaml
|
|
191
|
+
|
|
192
|
+
# 2. Populate table inventory with your groups (in StarRocks)
|
|
193
|
+
INSERT INTO ops.table_inventory (inventory_group, database_name, table_name)
|
|
194
|
+
VALUES
|
|
195
|
+
('daily_incrementals', 'sales_db', 'fact_orders'),
|
|
196
|
+
('weekly_full', 'sales_db', 'dim_customers'),
|
|
197
|
+
('weekly_full', 'sales_db', 'dim_products');
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
### Daily Incremental Backup (Mon-Sat)
|
|
201
|
+
|
|
202
|
+
```bash
|
|
203
|
+
# Run via cron at 01:00
|
|
204
|
+
0 1 * * 1-6 cd /path/to/starrocks-br && source .venv/bin/activate && starrocks-br backup incremental --config config.yaml --group daily_incrementals
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
### Weekly Full Backup (Sunday)
|
|
208
|
+
|
|
209
|
+
```bash
|
|
210
|
+
# Run via cron at 01:00 on Sundays
|
|
211
|
+
0 1 * * 0 cd /path/to/starrocks-br && source .venv/bin/activate && starrocks-br backup full --config config.yaml --group weekly_full
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
### Disaster Recovery - Point-in-Time Restore
|
|
215
|
+
|
|
216
|
+
```bash
|
|
217
|
+
# Restore to a specific backup point (automatically handles full + incremental chain)
|
|
218
|
+
starrocks-br restore \
|
|
219
|
+
--config config.yaml \
|
|
220
|
+
--target-label sales_db_20251015_inc \
|
|
221
|
+
--group daily_facts
|
|
222
|
+
|
|
223
|
+
# Restore all tables from a full backup
|
|
224
|
+
starrocks-br restore \
|
|
225
|
+
--config config.yaml \
|
|
226
|
+
--target-label sales_db_20251014_full
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
## Error Handling
|
|
230
|
+
|
|
231
|
+
The CLI automatically handles:
|
|
232
|
+
|
|
233
|
+
- **Job slot conflicts**: Prevents overlapping backups/restores via `ops.run_status`
|
|
234
|
+
- **Label collisions**: Automatically appends `_r#` suffix if label exists
|
|
235
|
+
- **Cluster health**: Verifies FE/BE status before starting operations
|
|
236
|
+
- **Repository validation**: Ensures repository exists and is accessible
|
|
237
|
+
- **Graceful failures**: All errors are logged to history tables with proper status
|
|
238
|
+
|
|
239
|
+
## Monitoring
|
|
240
|
+
|
|
241
|
+
All operations are logged to:
|
|
242
|
+
- `ops.backup_history`: Tracks all backup attempts with status, timestamps, and error messages
|
|
243
|
+
- `ops.restore_history`: Tracks all restore operations with verification checksums
|
|
244
|
+
- `ops.run_status`: Tracks active jobs to prevent conflicts
|
|
245
|
+
|
|
246
|
+
Query examples:
|
|
247
|
+
|
|
248
|
+
```sql
|
|
249
|
+
-- Check recent backup status
|
|
250
|
+
SELECT label, backup_type, status, started_at, finished_at
|
|
251
|
+
FROM ops.backup_history
|
|
252
|
+
ORDER BY started_at DESC
|
|
253
|
+
LIMIT 10;
|
|
254
|
+
|
|
255
|
+
-- Check for failed backups
|
|
256
|
+
SELECT label, backup_type, error_message, started_at
|
|
257
|
+
FROM ops.backup_history
|
|
258
|
+
WHERE status = 'FAILED'
|
|
259
|
+
ORDER BY started_at DESC;
|
|
260
|
+
|
|
261
|
+
-- Check active jobs
|
|
262
|
+
SELECT scope, label, state, started_at
|
|
263
|
+
FROM ops.run_status
|
|
264
|
+
WHERE state = 'ACTIVE';
|
|
265
|
+
```
|
|
266
|
+
|
|
267
|
+
## Testing
|
|
268
|
+
|
|
269
|
+
The project includes comprehensive tests (150+ tests, 90%+ coverage).
|
|
270
|
+
|
|
271
|
+
```bash
|
|
272
|
+
# Run all tests
|
|
273
|
+
pytest
|
|
274
|
+
|
|
275
|
+
# Run with coverage report
|
|
276
|
+
pytest --cov=src/starrocks_br --cov-report=term-missing
|
|
277
|
+
|
|
278
|
+
# Run specific test file
|
|
279
|
+
pytest tests/test_cli.py -v
|
|
280
|
+
```
|
|
281
|
+
|
|
282
|
+
## Project Status
|
|
283
|
+
|
|
284
|
+
✅ **Completed:**
|
|
285
|
+
- Config loader & validation
|
|
286
|
+
- Database connection wrapper
|
|
287
|
+
- StarRocks repository management
|
|
288
|
+
- Cluster health checks
|
|
289
|
+
- Job slot reservation (concurrency control)
|
|
290
|
+
- Label generation with collision handling
|
|
291
|
+
- Group-based backup planners for full and incremental backups
|
|
292
|
+
- Schema initialization (ops tables) with auto-creation
|
|
293
|
+
- Backup & restore history logging
|
|
294
|
+
- Backup executor with polling
|
|
295
|
+
- Intelligent point-in-time restore with automatic backup chain resolution
|
|
296
|
+
- Partition metadata tracking for backup manifests
|
|
297
|
+
- Atomic table rename for safe restore operations
|
|
298
|
+
- CLI commands (init, backup full, backup incremental, restore)
|
|
299
|
+
|
|
300
|
+
📋 **Optional (deferred):**
|
|
301
|
+
- Exponential backoff retry for job conflicts
|
|
302
|
+
- Disk space precheck (requires external monitoring)
|
|
303
|
+
- Formal runbooks and DR drill procedures
|
|
304
|
+
- Monitoring dashboards and alerting integration
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "starrocks-br"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "StarRocks Backup and Restore automation tool"
|
|
9
|
+
requires-python = ">=3.9"
|
|
10
|
+
dependencies = [
|
|
11
|
+
"click>=8.1.7,<9",
|
|
12
|
+
"PyYAML>=6.0.1,<7",
|
|
13
|
+
"mysql-connector-python>=9.0.0,<10",
|
|
14
|
+
]
|
|
15
|
+
|
|
16
|
+
[project.optional-dependencies]
|
|
17
|
+
dev = [
|
|
18
|
+
"pytest>=8.3.2,<9",
|
|
19
|
+
"pytest-mock>=3.14.0,<4",
|
|
20
|
+
"pytest-cov>=5.0.0,<6",
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
[project.scripts]
|
|
24
|
+
starrocks-br = "starrocks_br.cli:cli"
|
|
25
|
+
|
|
26
|
+
[tool.setuptools.packages.find]
|
|
27
|
+
where = ["src"]
|
|
28
|
+
|
|
29
|
+
[tool.pytest.ini_options]
|
|
30
|
+
pythonpath = ["src"]
|
|
31
|
+
addopts = "-q --cov=src/starrocks_br --cov-report=term-missing"
|
|
32
|
+
testpaths = ["tests"]
|
|
33
|
+
|
|
34
|
+
[tool.coverage.run]
|
|
35
|
+
branch = true
|
|
36
|
+
source = ["src/starrocks_br"]
|
|
37
|
+
|
|
38
|
+
[tool.coverage.report]
|
|
39
|
+
omit = ["*/__init__.py"]
|
|
40
|
+
show_missing = true
|
|
41
|
+
skip_covered = false
|
|
42
|
+
|
|
43
|
+
[tool.black]
|
|
44
|
+
line-length = 100
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__all__ = ["cli", "config"]
|