starrocks-br 0.2.0__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. starrocks_br-0.2.0/README.md → starrocks_br-0.3.0/PKG-INFO +39 -64
  2. starrocks_br-0.3.0/README.md +442 -0
  3. {starrocks_br-0.2.0 → starrocks_br-0.3.0}/pyproject.toml +2 -1
  4. {starrocks_br-0.2.0 → starrocks_br-0.3.0}/src/starrocks_br/planner.py +9 -5
  5. {starrocks_br-0.2.0 → starrocks_br-0.3.0}/src/starrocks_br/schema.py +5 -2
  6. starrocks_br-0.3.0/src/starrocks_br.egg-info/PKG-INFO +456 -0
  7. {starrocks_br-0.2.0 → starrocks_br-0.3.0}/tests/test_planner.py +11 -6
  8. {starrocks_br-0.2.0 → starrocks_br-0.3.0}/tests/test_schema_setup.py +29 -5
  9. starrocks_br-0.2.0/PKG-INFO +0 -12
  10. starrocks_br-0.2.0/src/starrocks_br.egg-info/PKG-INFO +0 -12
  11. {starrocks_br-0.2.0 → starrocks_br-0.3.0}/setup.cfg +0 -0
  12. {starrocks_br-0.2.0 → starrocks_br-0.3.0}/src/starrocks_br/__init__.py +0 -0
  13. {starrocks_br-0.2.0 → starrocks_br-0.3.0}/src/starrocks_br/cli.py +0 -0
  14. {starrocks_br-0.2.0 → starrocks_br-0.3.0}/src/starrocks_br/concurrency.py +0 -0
  15. {starrocks_br-0.2.0 → starrocks_br-0.3.0}/src/starrocks_br/config.py +0 -0
  16. {starrocks_br-0.2.0 → starrocks_br-0.3.0}/src/starrocks_br/db.py +0 -0
  17. {starrocks_br-0.2.0 → starrocks_br-0.3.0}/src/starrocks_br/executor.py +0 -0
  18. {starrocks_br-0.2.0 → starrocks_br-0.3.0}/src/starrocks_br/health.py +0 -0
  19. {starrocks_br-0.2.0 → starrocks_br-0.3.0}/src/starrocks_br/history.py +0 -0
  20. {starrocks_br-0.2.0 → starrocks_br-0.3.0}/src/starrocks_br/labels.py +0 -0
  21. {starrocks_br-0.2.0 → starrocks_br-0.3.0}/src/starrocks_br/logger.py +0 -0
  22. {starrocks_br-0.2.0 → starrocks_br-0.3.0}/src/starrocks_br/repository.py +0 -0
  23. {starrocks_br-0.2.0 → starrocks_br-0.3.0}/src/starrocks_br/restore.py +0 -0
  24. {starrocks_br-0.2.0 → starrocks_br-0.3.0}/src/starrocks_br/timezone.py +0 -0
  25. {starrocks_br-0.2.0 → starrocks_br-0.3.0}/src/starrocks_br.egg-info/SOURCES.txt +0 -0
  26. {starrocks_br-0.2.0 → starrocks_br-0.3.0}/src/starrocks_br.egg-info/dependency_links.txt +0 -0
  27. {starrocks_br-0.2.0 → starrocks_br-0.3.0}/src/starrocks_br.egg-info/entry_points.txt +0 -0
  28. {starrocks_br-0.2.0 → starrocks_br-0.3.0}/src/starrocks_br.egg-info/requires.txt +0 -0
  29. {starrocks_br-0.2.0 → starrocks_br-0.3.0}/src/starrocks_br.egg-info/top_level.txt +0 -0
  30. {starrocks_br-0.2.0 → starrocks_br-0.3.0}/tests/test_cli.py +0 -0
  31. {starrocks_br-0.2.0 → starrocks_br-0.3.0}/tests/test_concurrency.py +0 -0
  32. {starrocks_br-0.2.0 → starrocks_br-0.3.0}/tests/test_config.py +0 -0
  33. {starrocks_br-0.2.0 → starrocks_br-0.3.0}/tests/test_db.py +0 -0
  34. {starrocks_br-0.2.0 → starrocks_br-0.3.0}/tests/test_executor.py +0 -0
  35. {starrocks_br-0.2.0 → starrocks_br-0.3.0}/tests/test_health_checks.py +0 -0
  36. {starrocks_br-0.2.0 → starrocks_br-0.3.0}/tests/test_history.py +0 -0
  37. {starrocks_br-0.2.0 → starrocks_br-0.3.0}/tests/test_labels.py +0 -0
  38. {starrocks_br-0.2.0 → starrocks_br-0.3.0}/tests/test_logger.py +0 -0
  39. {starrocks_br-0.2.0 → starrocks_br-0.3.0}/tests/test_repository_sql.py +0 -0
  40. {starrocks_br-0.2.0 → starrocks_br-0.3.0}/tests/test_restore.py +0 -0
  41. {starrocks_br-0.2.0 → starrocks_br-0.3.0}/tests/test_timezone.py +0 -0
@@ -1,3 +1,17 @@
1
+ Metadata-Version: 2.4
2
+ Name: starrocks-br
3
+ Version: 0.3.0
4
+ Summary: StarRocks Backup and Restore automation tool
5
+ Requires-Python: >=3.9
6
+ Description-Content-Type: text/markdown
7
+ Requires-Dist: click<9,>=8.1.7
8
+ Requires-Dist: PyYAML<7,>=6.0.1
9
+ Requires-Dist: mysql-connector-python<10,>=9.0.0
10
+ Provides-Extra: dev
11
+ Requires-Dist: pytest<9,>=8.3.2; extra == "dev"
12
+ Requires-Dist: pytest-mock<4,>=3.14.0; extra == "dev"
13
+ Requires-Dist: pytest-cov<6,>=5.0.0; extra == "dev"
14
+
1
15
  # StarRocks Backup & Restore - CLI Usage Guide
2
16
 
3
17
  ## Overview
@@ -6,6 +20,8 @@ The StarRocks Backup & Restore tool provides production-grade automation for bac
6
20
 
7
21
  **Important:** This tool requires StarRocks 3.5 or later. Earlier versions are not supported due to differences in the `SHOW FRONTENDS` and `SHOW BACKENDS` command output formats, which are used for cluster health checks.
8
22
 
23
+ **📋 [View Release Notes & Changelog](CHANGELOG.md)**
24
+
9
25
  ## Summary
10
26
 
11
27
  - [Installation](#installation)
@@ -17,8 +33,7 @@ The StarRocks Backup & Restore tool provides production-grade automation for bac
17
33
  - [Example Usage Scenarios](#example-usage-scenarios)
18
34
  - [Error Handling](#error-handling)
19
35
  - [Monitoring](#monitoring)
20
- - [Testing](#testing)
21
- - [Project Status](#project-status)
36
+ - [Changelog](CHANGELOG.md)
22
37
 
23
38
  ## Installation
24
39
 
@@ -33,7 +48,7 @@ source .venv/bin/activate # On Linux/Mac
33
48
  # .venv\Scripts\activate # On Windows
34
49
 
35
50
  # Install the package from PyPI
36
- pip install starrocks-br==0.1.0
51
+ pip install starrocks-br
37
52
 
38
53
  # Verify the installation
39
54
  starrocks-br --help
@@ -41,40 +56,38 @@ starrocks-br --help
41
56
 
42
57
  **Note:** Always activate the virtual environment before using the tool. The `starrocks-br` command will only be available when the virtual environment is activated.
43
58
 
44
- ### Option 2: Standalone Executable (Alternative if PyPI Installation Fails)
59
+ ### Option 2: Download Pre-built Standalone Executable
45
60
 
46
- If you encounter problems when installing via PyPI (often due to `mysql-connector-python` native extension issues), you can build a standalone executable that bundles all dependencies.
61
+ If you prefer not to manage Python environments, you can download a bundled executable that includes the Python runtime and all dependencies.
47
62
 
48
- **Note:** This requires cloning the repository first.
63
+ 1. **Download the artifact** for your platform from the latest [Build Executables workflow run](https://github.com/deep-bi/starrocks-br/actions/workflows/build-executables.yml) (Artifacts section).
64
+ - `starrocks-br-linux-x86_64` → Linux (Intel/AMD)
65
+ - `starrocks-br-windows-x86_64` → Windows (Intel/AMD)
66
+ - `starrocks-br-macos-arm64` → macOS on Apple Silicon (M1/M2/M3)
67
+ - `starrocks-br-macos-x86_64` → macOS on Intel chips
49
68
 
50
- ```bash
51
- # Clone the repository
52
- git clone https://github.com/deep-bi/starrocks-br
53
- cd starrocks-br
69
+ 2. **Extract the ZIP file** (artifacts are delivered as ZIPs).
54
70
 
55
- # Create and activate virtual environment
56
- python3 -m venv .venv
57
- source .venv/bin/activate # On Linux/Mac
58
- # .venv\Scripts\activate # On Windows
59
-
60
- # Install dependencies
61
- pip install -e .
71
+ 3. **Make the file executable (Linux/macOS):**
72
+ ```bash
73
+ chmod +x starrocks-br
74
+ ```
62
75
 
63
- # Build the standalone executable
64
- ./build_executable.sh
76
+ 4. **Run it directly:**
77
+ ```bash
78
+ ./starrocks-br --help # Linux/macOS
79
+ .\starrocks-br.exe --help # Windows (PowerShell)
80
+ ```
65
81
 
66
- # The executable will be created in: dist/starrocks-br
67
- # You can now distribute or use this executable directly
68
- ./dist/starrocks-br --help
69
- ```
82
+ 5. **Keep it updated:** Download the latest artifact whenever a new release is published. (Future releases will bundle executables automatically.)
70
83
 
71
- **Note:** The executable is platform-specific. Build it on the same OS/architecture where you'll use it, or build it on the target machine.
84
+ **Need to build it yourself?** Clone the repo and run `./build_executable.sh` to recreate the executable locally (see script for details).
72
85
 
73
86
  ### Option 3: Using Devbox (Recommended for Development)
74
87
 
75
88
  **Note:** This requires cloning the repository first.
76
89
 
77
- Devbox provides a reproducible development environment with all required tools.
90
+ [Devbox](https://www.jetify.com/devbox) is a reproducible development environment that installs all required tools (Python, dependencies, virtualenv) in one step.
78
91
 
79
92
  ```bash
80
93
  # Clone the repository
@@ -115,7 +128,7 @@ pip install -e ".[dev]"
115
128
 
116
129
  ## Quick Start
117
130
 
118
- After installing from PyPI, follow these steps:
131
+ After installing the CLI (via PyPI, executable download, Devbox, or manual setup), follow these steps:
119
132
 
120
133
  1. **Activate your virtual environment** (if not already active):
121
134
  ```bash
@@ -441,41 +454,3 @@ FROM ops.run_status
441
454
  WHERE state = 'ACTIVE';
442
455
  ```
443
456
 
444
- ## Testing
445
-
446
- The project includes comprehensive tests (150+ tests, 90%+ coverage).
447
-
448
- ```bash
449
- # Run all tests
450
- pytest
451
-
452
- # Run with coverage report
453
- pytest --cov=src/starrocks_br --cov-report=term-missing
454
-
455
- # Run specific test file
456
- pytest tests/test_cli.py -v
457
- ```
458
-
459
- ## Project Status
460
-
461
- ✅ **Completed:**
462
- - Config loader & validation
463
- - Database connection wrapper
464
- - StarRocks repository management
465
- - Cluster health checks
466
- - Job slot reservation (concurrency control)
467
- - Label generation with collision handling
468
- - Group-based backup planners for full and incremental backups
469
- - Schema initialization (ops tables) with auto-creation
470
- - Backup & restore history logging
471
- - Backup executor with polling
472
- - Intelligent point-in-time restore with automatic backup chain resolution
473
- - Partition metadata tracking for backup manifests
474
- - Atomic table rename for safe restore operations
475
- - CLI commands (init, backup full, backup incremental, restore)
476
-
477
- 📋 **Optional (deferred):**
478
- - Exponential backoff retry for job conflicts
479
- - Disk space precheck (requires external monitoring)
480
- - Formal runbooks and DR drill procedures
481
- - Monitoring dashboards and alerting integration
@@ -0,0 +1,442 @@
1
+ # StarRocks Backup & Restore - CLI Usage Guide
2
+
3
+ ## Overview
4
+
5
+ The StarRocks Backup & Restore tool provides production-grade automation for backup and restore operations.
6
+
7
+ **Important:** This tool requires StarRocks 3.5 or later. Earlier versions are not supported due to differences in the `SHOW FRONTENDS` and `SHOW BACKENDS` command output formats, which are used for cluster health checks.
8
+
9
+ **📋 [View Release Notes & Changelog](CHANGELOG.md)**
10
+
11
+ ## Summary
12
+
13
+ - [Installation](#installation)
14
+ - [Quick Start](#quick-start)
15
+ - [Configuration](#configuration)
16
+ - [Password Management](#password-management)
17
+ - [Connecting with TLS/SSL](#connecting-with-tlsssl)
18
+ - [Commands](#commands)
19
+ - [Example Usage Scenarios](#example-usage-scenarios)
20
+ - [Error Handling](#error-handling)
21
+ - [Monitoring](#monitoring)
22
+ - [Changelog](CHANGELOG.md)
23
+
24
+ ## Installation
25
+
26
+ ### Option 1: Install from PyPI (Recommended for Production)
27
+
28
+ We recommend using a virtual environment to ensure proper script availability and dependency isolation:
29
+
30
+ ```bash
31
+ # Create and activate a virtual environment
32
+ python3 -m venv .venv
33
+ source .venv/bin/activate # On Linux/Mac
34
+ # .venv\Scripts\activate # On Windows
35
+
36
+ # Install the package from PyPI
37
+ pip install starrocks-br
38
+
39
+ # Verify the installation
40
+ starrocks-br --help
41
+ ```
42
+
43
+ **Note:** Always activate the virtual environment before using the tool. The `starrocks-br` command will only be available when the virtual environment is activated.
44
+
45
+ ### Option 2: Download Pre-built Standalone Executable
46
+
47
+ If you prefer not to manage Python environments, you can download a bundled executable that includes the Python runtime and all dependencies.
48
+
49
+ 1. **Download the artifact** for your platform from the latest [Build Executables workflow run](https://github.com/deep-bi/starrocks-br/actions/workflows/build-executables.yml) (Artifacts section).
50
+ - `starrocks-br-linux-x86_64` → Linux (Intel/AMD)
51
+ - `starrocks-br-windows-x86_64` → Windows (Intel/AMD)
52
+ - `starrocks-br-macos-arm64` → macOS on Apple Silicon (M1/M2/M3)
53
+ - `starrocks-br-macos-x86_64` → macOS on Intel chips
54
+
55
+ 2. **Extract the ZIP file** (artifacts are delivered as ZIPs).
56
+
57
+ 3. **Make the file executable (Linux/macOS):**
58
+ ```bash
59
+ chmod +x starrocks-br
60
+ ```
61
+
62
+ 4. **Run it directly:**
63
+ ```bash
64
+ ./starrocks-br --help # Linux/macOS
65
+ .\starrocks-br.exe --help # Windows (PowerShell)
66
+ ```
67
+
68
+ 5. **Keep it updated:** Download the latest artifact whenever a new release is published. (Future releases will bundle executables automatically.)
69
+
70
+ **Need to build it yourself?** Clone the repo and run `./build_executable.sh` to recreate the executable locally (see script for details).
71
+
72
+ ### Option 3: Using Devbox (Recommended for Development)
73
+
74
+ **Note:** This requires cloning the repository first.
75
+
76
+ [Devbox](https://www.jetify.com/devbox) is a reproducible development environment that installs all required tools (Python, dependencies, virtualenv) in one step.
77
+
78
+ ```bash
79
+ # Clone the repository
80
+ git clone https://github.com/deep-bi/starrocks-br
81
+ cd starrocks-br
82
+
83
+ # Install devbox (if not already installed)
84
+ curl -fsSL https://get.jetpack.io/devbox | bash
85
+
86
+ # Start devbox shell - this automatically:
87
+ # - Installs Python 3.11 and dependencies
88
+ # - Creates a virtual environment (.venv)
89
+ # - Installs the package in editable mode
90
+ # - Installs development dependencies
91
+ devbox shell
92
+
93
+ # Once inside the devbox shell, you're ready to go:
94
+ starrocks-br --help
95
+ pytest
96
+ ```
97
+
98
+ ### Option 4: Manual Development Setup
99
+
100
+ ```bash
101
+ # Clone the repository
102
+ git clone https://github.com/deep-bi/starrocks-br
103
+ cd starrocks-br
104
+
105
+ # Create and activate virtual environment
106
+ python3 -m venv .venv
107
+ source .venv/bin/activate
108
+
109
+ # Install in editable mode with development dependencies
110
+ pip install -e ".[dev]"
111
+
112
+ # The CLI is now available as: starrocks-br
113
+ ```
114
+
115
+ ## Quick Start
116
+
117
+ After installing the CLI (via PyPI, executable download, Devbox, or manual setup), follow these steps:
118
+
119
+ 1. **Activate your virtual environment** (if not already active):
120
+ ```bash
121
+ source .venv/bin/activate # On Linux/Mac
122
+ # .venv\Scripts\activate # On Windows
123
+ ```
124
+
125
+ 2. **Verify installation:**
126
+ ```bash
127
+ starrocks-br --help
128
+ ```
129
+
130
+ 3. **Create your `config.yaml` file** (see Configuration section below)
131
+
132
+ 4. **Set your password as an environment variable:**
133
+ ```bash
134
+ export STARROCKS_PASSWORD="your_password"
135
+ ```
136
+
137
+ On Windows (PowerShell):
138
+ ```powershell
139
+ $env:STARROCKS_PASSWORD="your_password"
140
+ ```
141
+
142
+ On Windows (Command Prompt):
143
+ ```cmd
144
+ set STARROCKS_PASSWORD=your_password
145
+ ```
146
+
147
+ 5. **Initialize the ops schema:**
148
+ ```bash
149
+ starrocks-br init --config config.yaml
150
+ ```
151
+
152
+ 6. **Start using the tool** - see Commands section below for details
153
+
154
+ ## Configuration
155
+
156
+ **Important:** After installing the package, you need to create your own `config.yaml` file. This file is **not included in the package** - each user creates it with their own StarRocks connection details. You can place it anywhere and reference it using the `--config` parameter.
157
+
158
+ Create a `config.yaml` file in your working directory (or any location you prefer) with your StarRocks connection details:
159
+
160
+ ```yaml
161
+ host: "127.0.0.1"
162
+ port: 9030
163
+ user: "root"
164
+ database: "your_database"
165
+ repository: "your_repo_name"
166
+ ```
167
+
168
+ **Password Management**
169
+
170
+ The database password must be provided via the `STARROCKS_PASSWORD` environment variable. This is a security measure to prevent storing credentials in configuration files.
171
+
172
+ ```bash
173
+ export STARROCKS_PASSWORD="your_password"
174
+ ```
175
+
176
+ ### Connecting with TLS/SSL
177
+
178
+ The tool can make secure connections to StarRocks using TLS. Add an optional `tls` section to your `config.yaml` when you need encryption.
179
+
180
+ #### Scenario 1: Server Authentication (Most Common)
181
+
182
+ Use this setup when the client only needs to verify the StarRocks server certificate.
183
+
184
+ ```yaml
185
+ host: "127.0.0.1"
186
+ port: 9030
187
+ user: "root"
188
+ database: "your_database"
189
+ repository: "your_repo_name"
190
+
191
+ tls:
192
+ enabled: true
193
+ ca_cert: "/path/to/ca.pem"
194
+ ```
195
+
196
+ - `enabled`: Turns TLS on or off.
197
+ - `ca_cert`: Certificate Authority file used to validate the server certificate.
198
+ - `verify_server_cert` (optional, default `true`): Disable only if you need to skip certificate validation.
199
+
200
+ #### Scenario 2: Mutual TLS (mTLS)
201
+
202
+ Use this when both the client and server must present certificates.
203
+
204
+ ```yaml
205
+ host: "127.0.0.1"
206
+ port: 9030
207
+ user: "root"
208
+ database: "your_database"
209
+ repository: "your_repo_name"
210
+
211
+ tls:
212
+ enabled: true
213
+ ca_cert: "/path/to/ca.pem"
214
+ client_cert: "/path/to/client-cert.pem"
215
+ client_key: "/path/to/client-key.pem"
216
+ ```
217
+
218
+ - `client_cert`: Client certificate presented to the server.
219
+ - `client_key`: Private key paired with the client certificate.
220
+
221
+ Regardless of the scenario, the connection defaults to modern TLS versions (`TLSv1.2`, `TLSv1.3`). Provide a `tls_versions` list if you need different protocol settings.
222
+
223
+ **Note:** The repository must be created in StarRocks using the `CREATE REPOSITORY` command before running backups. For example:
224
+
225
+ ```sql
226
+ CREATE REPOSITORY `your_repo_name`
227
+ WITH S3
228
+ ON LOCATION "s3://your-backup-bucket/backups/"
229
+ PROPERTIES (
230
+ "aws.s3.access_key" = "your-access-key",
231
+ "aws.s3.secret_key" = "your-secret-key",
232
+ "aws.s3.endpoint" = "https://s3.amazonaws.com"
233
+ );
234
+ ```
235
+
236
+ ## Commands
237
+
238
+ ### Initialize Schema
239
+
240
+ Before running backups, initialize the ops database and control tables:
241
+
242
+ ```bash
243
+ starrocks-br init --config config.yaml
244
+ ```
245
+
246
+ **What it does:**
247
+ - Creates `ops` database
248
+ - Creates `ops.table_inventory`: Inventory groups mapping to databases/tables
249
+ - Creates `ops.backup_history`: Backup operation history
250
+ - Creates `ops.restore_history`: Restore operation history
251
+ - Creates `ops.run_status`: Job concurrency control
252
+ - Creates `ops.backup_partitions`: Partition manifest for each backup (enables intelligent restore)
253
+
254
+ **Next step:** Populate `ops.table_inventory` with your backup groups. For example:
255
+ ```sql
256
+ INSERT INTO ops.table_inventory (inventory_group, database_name, table_name)
257
+ VALUES
258
+ ('daily_facts', 'your_db', 'fact_sales'),
259
+ ('weekly_dims', 'your_db', 'dim_users'),
260
+ ('weekly_dims', 'your_db', 'dim_products'),
261
+ ('full_db_backup', 'your_db', '*'); -- Wildcard for all tables
262
+ ```
263
+
264
+ **Note:** If you skip this step, the ops schema will be auto-created on your first backup/restore operation (with a warning).
265
+
266
+ ### Backup Commands
267
+
268
+ Backups are managed through "inventory groups" defined in `ops.table_inventory`. This provides a flexible way to schedule different backup strategies for different sets of tables.
269
+
270
+ #### 1. Full Backup
271
+
272
+ Runs a full backup for all tables within a specified inventory group.
273
+
274
+ ```bash
275
+ starrocks-br backup full --config config.yaml --group <group_name>
276
+ ```
277
+
278
+ **Parameters:**
279
+ - `--group`: The inventory group to back up.
280
+
281
+ **Internal flow:**
282
+ 1. Load config → verify cluster health → ensure repository exists
283
+ 2. Reserve job slot (prevent concurrent backups)
284
+ 3. Query `ops.table_inventory` for all tables in the specified group.
285
+ 4. Generate a unique backup label.
286
+ 5. Build and execute the `BACKUP` command for the resolved tables.
287
+ 6. Poll `SHOW BACKUP` until completion and log results.
288
+
289
+ #### 2. Incremental Backup
290
+
291
+ Backs up only the partitions that have changed since the last successful full backup for a given inventory group.
292
+
293
+ ```bash
294
+ starrocks-br backup incremental --config config.yaml --group <group_name>
295
+ ```
296
+
297
+ **Parameters:**
298
+ - `--group`: The inventory group to back up.
299
+ - `--baseline-backup` (Optional): Specify a backup label to use as the baseline instead of the latest full backup.
300
+
301
+ **Internal flow:**
302
+ 1. Load config → verify cluster health → ensure repository exists
303
+ 2. Reserve job slot
304
+ 3. Find the latest successful full backup for the group to use as a baseline.
305
+ 4. Find recent partitions from `information_schema.partitions` for tables in the group.
306
+ 5. Generate a unique backup label.
307
+ 6. Build and execute the `BACKUP` command for the new partitions.
308
+ 7. Poll `SHOW BACKUP` until completion and log results.
309
+
310
+ ### Restore Commands
311
+
312
+ #### Intelligent Point-in-Time Restore
313
+
314
+ Restores data to a specific point in time using intelligent backup chain resolution. This command automatically determines the correct sequence of backups needed for restore.
315
+
316
+ ```bash
317
+ starrocks-br restore \
318
+ --config config.yaml \
319
+ --target-label my_db_20251016_inc \
320
+ --group daily_facts \
321
+ --rename-suffix _restored
322
+ ```
323
+
324
+ **Parameters:**
325
+ - `--config`: Path to config YAML file (required)
326
+ - `--target-label`: Backup label to restore to (required)
327
+ - `--group`: Optional inventory group to filter tables to restore (cannot be used with `--table`)
328
+ - `--table`: Optional table name to restore (table name only, database comes from config). Cannot be used with `--group`
329
+ - `--rename-suffix`: Suffix for temporary tables during restore (default: `_restored`)
330
+
331
+ **How it works:**
332
+ - **For full backups**: Restores directly from the target backup
333
+ - **For incremental backups**: Automatically restores the base full backup first, then applies the incremental
334
+ - **Safety mechanism**: Uses temporary tables with the specified suffix, then performs atomic rename to make restored data live
335
+
336
+ **Three Restore Modes:**
337
+ - **Disaster Recovery**: Restore all tables from a backup (omit both `--group` and `--table` parameters)
338
+ - **Surgical Restore by Group**: Restore only specific table groups (use `--group` parameter)
339
+ - **Single Table Restore**: Restore a specific table (use `--table` parameter). The table name should not include the database prefix - the database comes from the config file.
340
+
341
+ **Table Name Format:**
342
+ When using `--table`, provide only the table name (e.g., `fact_sales`), not `database.table_name`. The database is taken from the `database` field in your config file. For multiple tables, set up an inventory group and use `--group` instead.
343
+
344
+ **Purpose of `--rename-suffix`:**
345
+ The restore process creates temporary tables with the specified suffix (e.g., `table_restored`) to avoid conflicts with existing tables. Once the restore is complete and verified, the tool performs atomic renames to swap the original tables with the restored data. This ensures data safety and allows for rollback if needed.
346
+
347
+ **Internal flow:**
348
+ 1. Load config → verify cluster health → ensure repository exists
349
+ 2. Find the correct restore sequence (full backup + optional incremental)
350
+ 3. Get tables from backup manifest (optionally filtered by group)
351
+ 4. Execute restore flow with atomic renames
352
+ 5. Log to `ops.restore_history`
353
+
354
+ ## Example Usage Scenarios
355
+
356
+ ### Initial Setup
357
+
358
+ ```bash
359
+ # 1. Initialize ops schema (run once)
360
+ starrocks-br init --config config.yaml
361
+
362
+ # 2. Populate table inventory with your groups (in StarRocks)
363
+ INSERT INTO ops.table_inventory (inventory_group, database_name, table_name)
364
+ VALUES
365
+ ('daily_incrementals', 'sales_db', 'fact_orders'),
366
+ ('weekly_full', 'sales_db', 'dim_customers'),
367
+ ('weekly_full', 'sales_db', 'dim_products');
368
+ ```
369
+
370
+ ### Daily Incremental Backup (Mon-Sat)
371
+
372
+ ```bash
373
+ # Run via cron at 01:00
374
+ 0 1 * * 1-6 cd /path/to/starrocks-br && source .venv/bin/activate && starrocks-br backup incremental --config config.yaml --group daily_incrementals
375
+ ```
376
+
377
+ ### Weekly Full Backup (Sunday)
378
+
379
+ ```bash
380
+ # Run via cron at 01:00 on Sundays
381
+ 0 1 * * 0 cd /path/to/starrocks-br && source .venv/bin/activate && starrocks-br backup full --config config.yaml --group weekly_full
382
+ ```
383
+
384
+ ### Disaster Recovery - Point-in-Time Restore
385
+
386
+ ```bash
387
+ # Restore to a specific backup point (automatically handles full + incremental chain)
388
+ starrocks-br restore \
389
+ --config config.yaml \
390
+ --target-label sales_db_20251015_inc \
391
+ --group daily_facts
392
+
393
+ # Restore all tables from a full backup
394
+ starrocks-br restore \
395
+ --config config.yaml \
396
+ --target-label sales_db_20251014_full
397
+
398
+ # Restore a single table from a backup
399
+ starrocks-br restore \
400
+ --config config.yaml \
401
+ --target-label sales_db_20251015_inc \
402
+ --table fact_sales
403
+ ```
404
+
405
+ ## Error Handling
406
+
407
+ The CLI automatically handles:
408
+
409
+ - **Job slot conflicts**: Prevents overlapping backups/restores via `ops.run_status`
410
+ - **Label collisions**: Automatically appends `_r#` suffix if label exists
411
+ - **Cluster health**: Verifies FE/BE status before starting operations
412
+ - **Repository validation**: Ensures repository exists and is accessible
413
+ - **Graceful failures**: All errors are logged to history tables with proper status
414
+
415
+ ## Monitoring
416
+
417
+ All operations are logged to:
418
+ - `ops.backup_history`: Tracks all backup attempts with status, timestamps, and error messages
419
+ - `ops.restore_history`: Tracks all restore operations with verification checksums
420
+ - `ops.run_status`: Tracks active jobs to prevent conflicts
421
+
422
+ Query examples:
423
+
424
+ ```sql
425
+ -- Check recent backup status
426
+ SELECT label, backup_type, status, started_at, finished_at
427
+ FROM ops.backup_history
428
+ ORDER BY started_at DESC
429
+ LIMIT 10;
430
+
431
+ -- Check for failed backups
432
+ SELECT label, backup_type, error_message, started_at
433
+ FROM ops.backup_history
434
+ WHERE status = 'FAILED'
435
+ ORDER BY started_at DESC;
436
+
437
+ -- Check active jobs
438
+ SELECT scope, label, state, started_at
439
+ FROM ops.run_status
440
+ WHERE state = 'ACTIVE';
441
+ ```
442
+
@@ -4,8 +4,9 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "starrocks-br"
7
- version = "0.2.0"
7
+ version = "0.3.0"
8
8
  description = "StarRocks Backup and Restore automation tool"
9
+ readme = "README.md"
9
10
  requires-python = ">=3.9"
10
11
  dependencies = [
11
12
  "click>=8.1.7,<9",
@@ -1,5 +1,6 @@
1
1
  from typing import List, Dict, Optional
2
2
  import datetime
3
+ import hashlib
3
4
 
4
5
  from starrocks_br import logger, timezone
5
6
 
@@ -232,7 +233,7 @@ def build_full_backup_command(db, group_name: str, repository: str, label: str,
232
233
 
233
234
  def record_backup_partitions(db, label: str, partitions: List[Dict[str, str]]) -> None:
234
235
  """Record partition metadata for a backup in ops.backup_partitions table.
235
-
236
+
236
237
  Args:
237
238
  db: Database connection
238
239
  label: Backup label
@@ -240,12 +241,15 @@ def record_backup_partitions(db, label: str, partitions: List[Dict[str, str]]) -
240
241
  """
241
242
  if not partitions:
242
243
  return
243
-
244
+
244
245
  for partition in partitions:
246
+ composite_key = f"{label}|{partition['database']}|{partition['table']}|{partition['partition_name']}"
247
+ key_hash = hashlib.md5(composite_key.encode('utf-8')).hexdigest()
248
+
245
249
  db.execute(f"""
246
- INSERT INTO ops.backup_partitions
247
- (label, database_name, table_name, partition_name)
248
- VALUES ('{label}', '{partition['database']}', '{partition['table']}', '{partition['partition_name']}')
250
+ INSERT INTO ops.backup_partitions
251
+ (key_hash, label, database_name, table_name, partition_name)
252
+ VALUES ('{key_hash}', '{label}', '{partition['database']}', '{partition['table']}', '{partition['partition_name']}')
249
253
  """)
250
254
 
251
255
 
@@ -71,8 +71,9 @@ def get_table_inventory_schema() -> str:
71
71
  created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
72
72
  updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
73
73
  )
74
- PRIMARY KEY (inventory_group, database_name, table_name)
74
+ UNIQUE KEY (inventory_group, database_name, table_name)
75
75
  COMMENT "Inventory groups mapping to databases/tables (supports '*' wildcard)"
76
+ DISTRIBUTED BY HASH(inventory_group)
76
77
  """
77
78
 
78
79
 
@@ -133,12 +134,14 @@ def get_backup_partitions_schema() -> str:
133
134
  """Get CREATE TABLE statement for backup_partitions."""
134
135
  return """
135
136
  CREATE TABLE IF NOT EXISTS ops.backup_partitions (
137
+ key_hash STRING NOT NULL COMMENT "MD5 hash of composite key (label, database_name, table_name, partition_name)",
136
138
  label STRING NOT NULL COMMENT "The backup label this partition belongs to. FK to ops.backup_history.label.",
137
139
  database_name STRING NOT NULL COMMENT "The name of the database the partition belongs to.",
138
140
  table_name STRING NOT NULL COMMENT "The name of the table the partition belongs to.",
139
141
  partition_name STRING NOT NULL COMMENT "The name of the specific partition.",
140
142
  created_at DATETIME DEFAULT CURRENT_TIMESTAMP COMMENT "Timestamp when this record was created."
141
143
  )
142
- PRIMARY KEY (label, database_name, table_name, partition_name)
144
+ PRIMARY KEY (key_hash)
143
145
  COMMENT "Tracks every partition included in a backup snapshot."
146
+ DISTRIBUTED BY HASH(key_hash)
144
147
  """