starrocks-br 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- starrocks_br/cli.py +307 -217
- starrocks_br/concurrency.py +50 -50
- starrocks_br/config.py +31 -23
- starrocks_br/db.py +38 -38
- starrocks_br/error_handler.py +265 -0
- starrocks_br/exceptions.py +93 -0
- starrocks_br/executor.py +102 -73
- starrocks_br/health.py +1 -6
- starrocks_br/history.py +5 -8
- starrocks_br/labels.py +14 -10
- starrocks_br/logger.py +45 -15
- starrocks_br/planner.py +112 -111
- starrocks_br/repository.py +3 -5
- starrocks_br/restore.py +241 -191
- starrocks_br/schema.py +15 -14
- starrocks_br/timezone.py +29 -31
- starrocks_br/utils.py +86 -0
- starrocks_br-0.5.0.dist-info/METADATA +153 -0
- starrocks_br-0.5.0.dist-info/RECORD +23 -0
- starrocks_br-0.3.0.dist-info/METADATA +0 -456
- starrocks_br-0.3.0.dist-info/RECORD +0 -20
- {starrocks_br-0.3.0.dist-info → starrocks_br-0.5.0.dist-info}/WHEEL +0 -0
- {starrocks_br-0.3.0.dist-info → starrocks_br-0.5.0.dist-info}/entry_points.txt +0 -0
- {starrocks_br-0.3.0.dist-info → starrocks_br-0.5.0.dist-info}/top_level.txt +0 -0
starrocks_br/cli.py
CHANGED
|
@@ -1,23 +1,36 @@
|
|
|
1
|
-
import click
|
|
2
1
|
import os
|
|
3
2
|
import sys
|
|
4
|
-
|
|
3
|
+
|
|
4
|
+
import click
|
|
5
|
+
|
|
6
|
+
from . import (
|
|
7
|
+
concurrency,
|
|
8
|
+
db,
|
|
9
|
+
error_handler,
|
|
10
|
+
exceptions,
|
|
11
|
+
executor,
|
|
12
|
+
health,
|
|
13
|
+
labels,
|
|
14
|
+
logger,
|
|
15
|
+
planner,
|
|
16
|
+
repository,
|
|
17
|
+
restore,
|
|
18
|
+
schema,
|
|
19
|
+
)
|
|
5
20
|
from . import config as config_module
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
def _handle_snapshot_exists_error(error_details: dict, label: str, config: str, repository: str, backup_type: str, group: str, baseline_backup: str = None) -> None:
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _handle_snapshot_exists_error(
|
|
24
|
+
error_details: dict,
|
|
25
|
+
label: str,
|
|
26
|
+
config: str,
|
|
27
|
+
repository: str,
|
|
28
|
+
backup_type: str,
|
|
29
|
+
group: str,
|
|
30
|
+
baseline_backup: str = None,
|
|
31
|
+
) -> None:
|
|
19
32
|
"""Handle snapshot_exists error by providing helpful guidance to the user.
|
|
20
|
-
|
|
33
|
+
|
|
21
34
|
Args:
|
|
22
35
|
error_details: Error details dict containing error_type and snapshot_name
|
|
23
36
|
label: The backup label that was generated
|
|
@@ -27,7 +40,7 @@ def _handle_snapshot_exists_error(error_details: dict, label: str, config: str,
|
|
|
27
40
|
group: Inventory group name
|
|
28
41
|
baseline_backup: Optional baseline backup label (for incremental backups)
|
|
29
42
|
"""
|
|
30
|
-
snapshot_name = error_details.get(
|
|
43
|
+
snapshot_name = error_details.get("snapshot_name", label)
|
|
31
44
|
logger.error(f"Snapshot '{snapshot_name}' already exists in the repository.")
|
|
32
45
|
logger.info("")
|
|
33
46
|
logger.info("This typically happens when:")
|
|
@@ -35,52 +48,65 @@ def _handle_snapshot_exists_error(error_details: dict, label: str, config: str,
|
|
|
35
48
|
logger.info(" • The backup completed on the server, but backup_history wasn't updated")
|
|
36
49
|
logger.info("")
|
|
37
50
|
logger.info("To resolve this, retry the backup with a custom label using --name:")
|
|
38
|
-
|
|
39
|
-
if backup_type ==
|
|
51
|
+
|
|
52
|
+
if backup_type == "incremental":
|
|
40
53
|
retry_cmd = f" starrocks-br backup incremental --config {config} --group {group} --name {snapshot_name}_retry"
|
|
41
54
|
if baseline_backup:
|
|
42
55
|
retry_cmd += f" --baseline-backup {baseline_backup}"
|
|
43
56
|
logger.info(retry_cmd)
|
|
44
57
|
else:
|
|
45
|
-
logger.info(
|
|
46
|
-
|
|
58
|
+
logger.info(
|
|
59
|
+
f" starrocks-br backup full --config {config} --group {group} --name {snapshot_name}_retry"
|
|
60
|
+
)
|
|
61
|
+
|
|
47
62
|
logger.info("")
|
|
48
63
|
logger.tip("You can verify the existing backup by checking the repository or running:")
|
|
49
64
|
logger.tip(f" SHOW SNAPSHOT ON {repository} WHERE Snapshot = '{snapshot_name}'")
|
|
50
65
|
|
|
51
66
|
|
|
52
67
|
@click.group()
|
|
53
|
-
|
|
68
|
+
@click.option("--verbose", is_flag=True, help="Enable verbose debug logging")
|
|
69
|
+
@click.pass_context
|
|
70
|
+
def cli(ctx, verbose):
|
|
54
71
|
"""StarRocks Backup & Restore automation tool."""
|
|
55
|
-
|
|
72
|
+
ctx.ensure_object(dict)
|
|
73
|
+
ctx.obj["verbose"] = verbose
|
|
56
74
|
|
|
75
|
+
if verbose:
|
|
76
|
+
import logging
|
|
57
77
|
|
|
58
|
-
|
|
59
|
-
|
|
78
|
+
logger.setup_logging(level=logging.DEBUG)
|
|
79
|
+
logger.debug("Verbose logging enabled")
|
|
80
|
+
else:
|
|
81
|
+
logger.setup_logging()
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
@cli.command("init")
|
|
85
|
+
@click.option("--config", required=True, help="Path to config YAML file")
|
|
60
86
|
def init(config):
|
|
61
87
|
"""Initialize ops database and control tables.
|
|
62
|
-
|
|
88
|
+
|
|
63
89
|
Creates the ops database with required tables:
|
|
64
90
|
- ops.table_inventory: Inventory groups mapping to databases/tables
|
|
65
91
|
- ops.backup_history: Backup operation history
|
|
66
92
|
- ops.restore_history: Restore operation history
|
|
67
93
|
- ops.run_status: Job concurrency control
|
|
68
|
-
|
|
94
|
+
|
|
69
95
|
Run this once before using backup/restore commands.
|
|
70
96
|
"""
|
|
71
97
|
try:
|
|
72
98
|
cfg = config_module.load_config(config)
|
|
73
99
|
config_module.validate_config(cfg)
|
|
74
|
-
|
|
100
|
+
|
|
75
101
|
database = db.StarRocksDB(
|
|
76
|
-
host=cfg[
|
|
77
|
-
port=cfg[
|
|
78
|
-
user=cfg[
|
|
79
|
-
password=os.getenv(
|
|
80
|
-
database=cfg[
|
|
81
|
-
tls_config=cfg.get(
|
|
102
|
+
host=cfg["host"],
|
|
103
|
+
port=cfg["port"],
|
|
104
|
+
user=cfg["user"],
|
|
105
|
+
password=os.getenv("STARROCKS_PASSWORD"),
|
|
106
|
+
database=cfg["database"],
|
|
107
|
+
tls_config=cfg.get("tls"),
|
|
82
108
|
)
|
|
83
|
-
|
|
109
|
+
|
|
84
110
|
with database:
|
|
85
111
|
logger.info("Initializing ops schema...")
|
|
86
112
|
schema.initialize_ops_schema(database)
|
|
@@ -95,8 +121,10 @@ def init(config):
|
|
|
95
121
|
logger.info(" VALUES ('my_full_dimension_tables', 'your_db', 'dim_products');")
|
|
96
122
|
logger.info("")
|
|
97
123
|
logger.info("2. Run your first backup:")
|
|
98
|
-
logger.info(
|
|
99
|
-
|
|
124
|
+
logger.info(
|
|
125
|
+
" starrocks-br backup incremental --group my_daily_incremental --config config.yaml"
|
|
126
|
+
)
|
|
127
|
+
|
|
100
128
|
except FileNotFoundError as e:
|
|
101
129
|
logger.error(f"Config file not found: {e}")
|
|
102
130
|
sys.exit(1)
|
|
@@ -114,117 +142,139 @@ def backup():
|
|
|
114
142
|
pass
|
|
115
143
|
|
|
116
144
|
|
|
117
|
-
@backup.command(
|
|
118
|
-
@click.option(
|
|
119
|
-
@click.option(
|
|
120
|
-
|
|
121
|
-
|
|
145
|
+
@backup.command("incremental")
|
|
146
|
+
@click.option("--config", required=True, help="Path to config YAML file")
|
|
147
|
+
@click.option(
|
|
148
|
+
"--baseline-backup",
|
|
149
|
+
help="Specific backup label to use as baseline (optional). If not provided, uses the latest successful full backup.",
|
|
150
|
+
)
|
|
151
|
+
@click.option(
|
|
152
|
+
"--group",
|
|
153
|
+
required=True,
|
|
154
|
+
help="Inventory group to backup from table_inventory. Supports wildcard '*'.",
|
|
155
|
+
)
|
|
156
|
+
@click.option(
|
|
157
|
+
"--name",
|
|
158
|
+
help="Optional logical name (label) for the backup. Supports -v#r placeholder for auto-versioning.",
|
|
159
|
+
)
|
|
122
160
|
def backup_incremental(config, baseline_backup, group, name):
|
|
123
161
|
"""Run incremental backup of partitions changed since the latest full backup.
|
|
124
|
-
|
|
162
|
+
|
|
125
163
|
By default, uses the latest successful full backup as baseline.
|
|
126
164
|
Optionally specify a specific backup label to use as baseline.
|
|
127
|
-
|
|
165
|
+
|
|
128
166
|
Flow: load config → check health → ensure repository → reserve job slot →
|
|
129
167
|
find baseline backup → find recent partitions → generate label → build backup command → execute backup
|
|
130
168
|
"""
|
|
131
169
|
try:
|
|
132
170
|
cfg = config_module.load_config(config)
|
|
133
171
|
config_module.validate_config(cfg)
|
|
134
|
-
|
|
172
|
+
|
|
135
173
|
database = db.StarRocksDB(
|
|
136
|
-
host=cfg[
|
|
137
|
-
port=cfg[
|
|
138
|
-
user=cfg[
|
|
139
|
-
password=os.getenv(
|
|
140
|
-
database=cfg[
|
|
141
|
-
tls_config=cfg.get(
|
|
174
|
+
host=cfg["host"],
|
|
175
|
+
port=cfg["port"],
|
|
176
|
+
user=cfg["user"],
|
|
177
|
+
password=os.getenv("STARROCKS_PASSWORD"),
|
|
178
|
+
database=cfg["database"],
|
|
179
|
+
tls_config=cfg.get("tls"),
|
|
142
180
|
)
|
|
143
|
-
|
|
181
|
+
|
|
144
182
|
with database:
|
|
145
183
|
was_created = schema.ensure_ops_schema(database)
|
|
146
184
|
if was_created:
|
|
147
|
-
logger.warning(
|
|
185
|
+
logger.warning(
|
|
186
|
+
"ops schema was auto-created. Please run 'starrocks-br init' after populating config."
|
|
187
|
+
)
|
|
148
188
|
logger.warning("Remember to populate ops.table_inventory with your backup groups!")
|
|
149
|
-
sys.exit(1)
|
|
150
|
-
|
|
189
|
+
sys.exit(1) # Exit if schema was just created, requires user action
|
|
190
|
+
|
|
151
191
|
healthy, message = health.check_cluster_health(database)
|
|
152
192
|
if not healthy:
|
|
153
193
|
logger.error(f"Cluster health check failed: {message}")
|
|
154
194
|
sys.exit(1)
|
|
155
|
-
|
|
195
|
+
|
|
156
196
|
logger.success(f"Cluster health: {message}")
|
|
157
|
-
|
|
158
|
-
repository.ensure_repository(database, cfg[
|
|
159
|
-
|
|
197
|
+
|
|
198
|
+
repository.ensure_repository(database, cfg["repository"])
|
|
199
|
+
|
|
160
200
|
logger.success(f"Repository '{cfg['repository']}' verified")
|
|
161
|
-
|
|
201
|
+
|
|
162
202
|
label = labels.determine_backup_label(
|
|
163
203
|
db=database,
|
|
164
|
-
backup_type=
|
|
165
|
-
database_name=cfg[
|
|
166
|
-
custom_name=name
|
|
204
|
+
backup_type="incremental",
|
|
205
|
+
database_name=cfg["database"],
|
|
206
|
+
custom_name=name,
|
|
167
207
|
)
|
|
168
|
-
|
|
208
|
+
|
|
169
209
|
logger.success(f"Generated label: {label}")
|
|
170
|
-
|
|
210
|
+
|
|
171
211
|
if baseline_backup:
|
|
172
212
|
logger.success(f"Using specified baseline backup: {baseline_backup}")
|
|
173
213
|
else:
|
|
174
|
-
latest_backup = planner.find_latest_full_backup(database, cfg[
|
|
214
|
+
latest_backup = planner.find_latest_full_backup(database, cfg["database"])
|
|
175
215
|
if latest_backup:
|
|
176
|
-
logger.success(
|
|
216
|
+
logger.success(
|
|
217
|
+
f"Using latest full backup as baseline: {latest_backup['label']} ({latest_backup['backup_type']})"
|
|
218
|
+
)
|
|
177
219
|
else:
|
|
178
|
-
logger.warning(
|
|
179
|
-
|
|
220
|
+
logger.warning(
|
|
221
|
+
"No full backup found - this will be the first incremental backup"
|
|
222
|
+
)
|
|
223
|
+
|
|
180
224
|
partitions = planner.find_recent_partitions(
|
|
181
|
-
database, cfg[
|
|
225
|
+
database, cfg["database"], baseline_backup_label=baseline_backup, group_name=group
|
|
182
226
|
)
|
|
183
|
-
|
|
227
|
+
|
|
184
228
|
if not partitions:
|
|
185
229
|
logger.warning("No partitions found to backup")
|
|
186
230
|
sys.exit(1)
|
|
187
|
-
|
|
231
|
+
|
|
188
232
|
logger.success(f"Found {len(partitions)} partition(s) to backup")
|
|
189
|
-
|
|
233
|
+
|
|
190
234
|
backup_command = planner.build_incremental_backup_command(
|
|
191
|
-
partitions, cfg[
|
|
235
|
+
partitions, cfg["repository"], label, cfg["database"]
|
|
192
236
|
)
|
|
193
|
-
|
|
237
|
+
|
|
238
|
+
concurrency.reserve_job_slot(database, scope="backup", label=label)
|
|
239
|
+
|
|
194
240
|
planner.record_backup_partitions(database, label, partitions)
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
logger.success(f"Job slot reserved")
|
|
241
|
+
|
|
242
|
+
logger.success("Job slot reserved")
|
|
199
243
|
logger.info(f"Starting incremental backup for group '{group}'...")
|
|
200
244
|
result = executor.execute_backup(
|
|
201
245
|
database,
|
|
202
246
|
backup_command,
|
|
203
|
-
repository=cfg[
|
|
204
|
-
backup_type=
|
|
205
|
-
scope=
|
|
206
|
-
database=cfg[
|
|
247
|
+
repository=cfg["repository"],
|
|
248
|
+
backup_type="incremental",
|
|
249
|
+
scope="backup",
|
|
250
|
+
database=cfg["database"],
|
|
207
251
|
)
|
|
208
|
-
|
|
209
|
-
if result[
|
|
252
|
+
|
|
253
|
+
if result["success"]:
|
|
210
254
|
logger.success(f"Backup completed successfully: {result['final_status']['state']}")
|
|
211
255
|
sys.exit(0)
|
|
212
256
|
else:
|
|
213
|
-
error_details = result.get(
|
|
214
|
-
if error_details and error_details.get(
|
|
257
|
+
error_details = result.get("error_details")
|
|
258
|
+
if error_details and error_details.get("error_type") == "snapshot_exists":
|
|
215
259
|
_handle_snapshot_exists_error(
|
|
216
|
-
error_details,
|
|
260
|
+
error_details,
|
|
261
|
+
label,
|
|
262
|
+
config,
|
|
263
|
+
cfg["repository"],
|
|
264
|
+
"incremental",
|
|
265
|
+
group,
|
|
266
|
+
baseline_backup,
|
|
217
267
|
)
|
|
218
268
|
sys.exit(1)
|
|
219
|
-
|
|
220
|
-
state = result.get(
|
|
269
|
+
|
|
270
|
+
state = result.get("final_status", {}).get("state", "UNKNOWN")
|
|
221
271
|
if state == "LOST":
|
|
222
272
|
logger.critical("Backup tracking lost!")
|
|
223
273
|
logger.warning("Another backup operation started during ours.")
|
|
224
274
|
logger.tip("Enable ops.run_status concurrency checks to prevent this.")
|
|
225
275
|
logger.error(f"{result['error_message']}")
|
|
226
276
|
sys.exit(1)
|
|
227
|
-
|
|
277
|
+
|
|
228
278
|
except FileNotFoundError as e:
|
|
229
279
|
logger.error(f"Config file not found: {e}")
|
|
230
280
|
sys.exit(1)
|
|
@@ -239,100 +289,111 @@ def backup_incremental(config, baseline_backup, group, name):
|
|
|
239
289
|
sys.exit(1)
|
|
240
290
|
|
|
241
291
|
|
|
242
|
-
@backup.command(
|
|
243
|
-
@click.option(
|
|
244
|
-
@click.option(
|
|
245
|
-
|
|
292
|
+
@backup.command("full")
|
|
293
|
+
@click.option("--config", required=True, help="Path to config YAML file")
|
|
294
|
+
@click.option(
|
|
295
|
+
"--group",
|
|
296
|
+
required=True,
|
|
297
|
+
help="Inventory group to backup from table_inventory. Supports wildcard '*'.",
|
|
298
|
+
)
|
|
299
|
+
@click.option(
|
|
300
|
+
"--name",
|
|
301
|
+
help="Optional logical name (label) for the backup. Supports -v#r placeholder for auto-versioning.",
|
|
302
|
+
)
|
|
246
303
|
def backup_full(config, group, name):
|
|
247
304
|
"""Run a full backup for a specified inventory group.
|
|
248
|
-
|
|
305
|
+
|
|
249
306
|
Flow: load config → check health → ensure repository → reserve job slot →
|
|
250
307
|
find tables by group → generate label → build backup command → execute backup
|
|
251
308
|
"""
|
|
252
309
|
try:
|
|
253
310
|
cfg = config_module.load_config(config)
|
|
254
311
|
config_module.validate_config(cfg)
|
|
255
|
-
|
|
312
|
+
|
|
256
313
|
database = db.StarRocksDB(
|
|
257
|
-
host=cfg[
|
|
258
|
-
port=cfg[
|
|
259
|
-
user=cfg[
|
|
260
|
-
password=os.getenv(
|
|
261
|
-
database=cfg[
|
|
262
|
-
tls_config=cfg.get(
|
|
314
|
+
host=cfg["host"],
|
|
315
|
+
port=cfg["port"],
|
|
316
|
+
user=cfg["user"],
|
|
317
|
+
password=os.getenv("STARROCKS_PASSWORD"),
|
|
318
|
+
database=cfg["database"],
|
|
319
|
+
tls_config=cfg.get("tls"),
|
|
263
320
|
)
|
|
264
|
-
|
|
321
|
+
|
|
265
322
|
with database:
|
|
266
323
|
was_created = schema.ensure_ops_schema(database)
|
|
267
324
|
if was_created:
|
|
268
|
-
logger.warning(
|
|
325
|
+
logger.warning(
|
|
326
|
+
"ops schema was auto-created. Please run 'starrocks-br init' after populating config."
|
|
327
|
+
)
|
|
269
328
|
logger.warning("Remember to populate ops.table_inventory with your backup groups!")
|
|
270
|
-
sys.exit(1)
|
|
271
|
-
|
|
329
|
+
sys.exit(1) # Exit if schema was just created, requires user action
|
|
330
|
+
|
|
272
331
|
healthy, message = health.check_cluster_health(database)
|
|
273
332
|
if not healthy:
|
|
274
333
|
logger.error(f"Cluster health check failed: {message}")
|
|
275
334
|
sys.exit(1)
|
|
276
|
-
|
|
335
|
+
|
|
277
336
|
logger.success(f"Cluster health: {message}")
|
|
278
|
-
|
|
279
|
-
repository.ensure_repository(database, cfg[
|
|
280
|
-
|
|
337
|
+
|
|
338
|
+
repository.ensure_repository(database, cfg["repository"])
|
|
339
|
+
|
|
281
340
|
logger.success(f"Repository '{cfg['repository']}' verified")
|
|
282
|
-
|
|
341
|
+
|
|
283
342
|
label = labels.determine_backup_label(
|
|
284
|
-
db=database,
|
|
285
|
-
backup_type='full',
|
|
286
|
-
database_name=cfg['database'],
|
|
287
|
-
custom_name=name
|
|
343
|
+
db=database, backup_type="full", database_name=cfg["database"], custom_name=name
|
|
288
344
|
)
|
|
289
|
-
|
|
345
|
+
|
|
290
346
|
logger.success(f"Generated label: {label}")
|
|
291
|
-
|
|
347
|
+
|
|
292
348
|
backup_command = planner.build_full_backup_command(
|
|
293
|
-
database, group, cfg[
|
|
349
|
+
database, group, cfg["repository"], label, cfg["database"]
|
|
294
350
|
)
|
|
295
|
-
|
|
351
|
+
|
|
296
352
|
if not backup_command:
|
|
297
|
-
logger.warning(
|
|
353
|
+
logger.warning(
|
|
354
|
+
f"No tables found in group '{group}' for database '{cfg['database']}' to backup"
|
|
355
|
+
)
|
|
298
356
|
sys.exit(1)
|
|
299
|
-
|
|
357
|
+
|
|
300
358
|
tables = planner.find_tables_by_group(database, group)
|
|
301
|
-
all_partitions = planner.get_all_partitions_for_tables(
|
|
359
|
+
all_partitions = planner.get_all_partitions_for_tables(
|
|
360
|
+
database, cfg["database"], tables
|
|
361
|
+
)
|
|
362
|
+
|
|
363
|
+
concurrency.reserve_job_slot(database, scope="backup", label=label)
|
|
364
|
+
|
|
302
365
|
planner.record_backup_partitions(database, label, all_partitions)
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
logger.success(f"Job slot reserved")
|
|
366
|
+
|
|
367
|
+
logger.success("Job slot reserved")
|
|
307
368
|
logger.info(f"Starting full backup for group '{group}'...")
|
|
308
369
|
result = executor.execute_backup(
|
|
309
370
|
database,
|
|
310
371
|
backup_command,
|
|
311
|
-
repository=cfg[
|
|
312
|
-
backup_type=
|
|
313
|
-
scope=
|
|
314
|
-
database=cfg[
|
|
372
|
+
repository=cfg["repository"],
|
|
373
|
+
backup_type="full",
|
|
374
|
+
scope="backup",
|
|
375
|
+
database=cfg["database"],
|
|
315
376
|
)
|
|
316
|
-
|
|
317
|
-
if result[
|
|
377
|
+
|
|
378
|
+
if result["success"]:
|
|
318
379
|
logger.success(f"Backup completed successfully: {result['final_status']['state']}")
|
|
319
380
|
sys.exit(0)
|
|
320
381
|
else:
|
|
321
|
-
error_details = result.get(
|
|
322
|
-
if error_details and error_details.get(
|
|
382
|
+
error_details = result.get("error_details")
|
|
383
|
+
if error_details and error_details.get("error_type") == "snapshot_exists":
|
|
323
384
|
_handle_snapshot_exists_error(
|
|
324
|
-
error_details, label, config, cfg[
|
|
385
|
+
error_details, label, config, cfg["repository"], "full", group
|
|
325
386
|
)
|
|
326
387
|
sys.exit(1)
|
|
327
|
-
|
|
328
|
-
state = result.get(
|
|
388
|
+
|
|
389
|
+
state = result.get("final_status", {}).get("state", "UNKNOWN")
|
|
329
390
|
if state == "LOST":
|
|
330
391
|
logger.critical("Backup tracking lost!")
|
|
331
392
|
logger.warning("Another backup operation started during ours.")
|
|
332
393
|
logger.tip("Enable ops.run_status concurrency checks to prevent this.")
|
|
333
394
|
logger.error(f"{result['error_message']}")
|
|
334
395
|
sys.exit(1)
|
|
335
|
-
|
|
396
|
+
|
|
336
397
|
except (FileNotFoundError, ValueError, RuntimeError, Exception) as e:
|
|
337
398
|
if isinstance(e, FileNotFoundError):
|
|
338
399
|
logger.error(f"Config file not found: {e}")
|
|
@@ -345,128 +406,158 @@ def backup_full(config, group, name):
|
|
|
345
406
|
sys.exit(1)
|
|
346
407
|
|
|
347
408
|
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
@
|
|
351
|
-
@click.option(
|
|
352
|
-
@click.option(
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
@click.option(
|
|
409
|
+
@cli.command("restore")
|
|
410
|
+
@click.option("--config", required=True, help="Path to config YAML file")
|
|
411
|
+
@click.option("--target-label", required=True, help="Backup label to restore to")
|
|
412
|
+
@click.option("--group", help="Optional inventory group to filter tables to restore")
|
|
413
|
+
@click.option(
|
|
414
|
+
"--table",
|
|
415
|
+
help="Optional table name to restore (table name only, database comes from config). Cannot be used with --group.",
|
|
416
|
+
)
|
|
417
|
+
@click.option(
|
|
418
|
+
"--rename-suffix",
|
|
419
|
+
default="_restored",
|
|
420
|
+
help="Suffix for temporary tables during restore (default: _restored)",
|
|
421
|
+
)
|
|
422
|
+
@click.option("--yes", is_flag=True, help="Skip confirmation prompt and proceed automatically")
|
|
357
423
|
def restore_command(config, target_label, group, table, rename_suffix, yes):
|
|
358
424
|
"""Restore data to a specific point in time using intelligent backup chain resolution.
|
|
359
|
-
|
|
425
|
+
|
|
360
426
|
This command automatically determines the correct sequence of backups needed for restore:
|
|
361
427
|
- For full backups: restores directly from the target backup
|
|
362
428
|
- For incremental backups: restores the base full backup first, then applies the incremental
|
|
363
|
-
|
|
429
|
+
|
|
364
430
|
The restore process uses temporary tables with the specified suffix for safety, then performs
|
|
365
431
|
an atomic rename to make the restored data live.
|
|
366
|
-
|
|
432
|
+
|
|
367
433
|
Flow: load config → check health → ensure repository → find restore pair → get tables from backup → execute restore flow
|
|
368
434
|
"""
|
|
369
435
|
try:
|
|
370
436
|
if group and table:
|
|
371
|
-
logger.error(
|
|
437
|
+
logger.error(
|
|
438
|
+
"Cannot specify both --group and --table. Use --table for single table restore or --group for inventory group restore."
|
|
439
|
+
)
|
|
372
440
|
sys.exit(1)
|
|
373
|
-
|
|
441
|
+
|
|
374
442
|
if table:
|
|
375
443
|
table = table.strip()
|
|
376
444
|
if not table:
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
445
|
+
raise exceptions.InvalidTableNameError("", "Table name cannot be empty")
|
|
446
|
+
|
|
447
|
+
if "." in table:
|
|
448
|
+
raise exceptions.InvalidTableNameError(
|
|
449
|
+
table,
|
|
450
|
+
"Table name must not include database prefix. Use 'table_name' not 'database.table_name'",
|
|
451
|
+
)
|
|
452
|
+
|
|
384
453
|
cfg = config_module.load_config(config)
|
|
385
454
|
config_module.validate_config(cfg)
|
|
386
|
-
|
|
455
|
+
|
|
387
456
|
database = db.StarRocksDB(
|
|
388
|
-
host=cfg[
|
|
389
|
-
port=cfg[
|
|
390
|
-
user=cfg[
|
|
391
|
-
password=os.getenv(
|
|
392
|
-
database=cfg[
|
|
393
|
-
tls_config=cfg.get(
|
|
457
|
+
host=cfg["host"],
|
|
458
|
+
port=cfg["port"],
|
|
459
|
+
user=cfg["user"],
|
|
460
|
+
password=os.getenv("STARROCKS_PASSWORD"),
|
|
461
|
+
database=cfg["database"],
|
|
462
|
+
tls_config=cfg.get("tls"),
|
|
394
463
|
)
|
|
395
|
-
|
|
464
|
+
|
|
396
465
|
with database:
|
|
397
466
|
was_created = schema.ensure_ops_schema(database)
|
|
398
467
|
if was_created:
|
|
399
|
-
logger.warning(
|
|
468
|
+
logger.warning(
|
|
469
|
+
"ops schema was auto-created. Please run 'starrocks-br init' after populating config."
|
|
470
|
+
)
|
|
400
471
|
logger.warning("Remember to populate ops.table_inventory with your backup groups!")
|
|
401
|
-
sys.exit(1)
|
|
402
|
-
|
|
472
|
+
sys.exit(1) # Exit if schema was just created, requires user action
|
|
473
|
+
|
|
403
474
|
healthy, message = health.check_cluster_health(database)
|
|
404
475
|
if not healthy:
|
|
405
476
|
logger.error(f"Cluster health check failed: {message}")
|
|
406
477
|
sys.exit(1)
|
|
407
|
-
|
|
478
|
+
|
|
408
479
|
logger.success(f"Cluster health: {message}")
|
|
409
|
-
|
|
410
|
-
repository.ensure_repository(database, cfg[
|
|
411
|
-
|
|
480
|
+
|
|
481
|
+
repository.ensure_repository(database, cfg["repository"])
|
|
482
|
+
|
|
412
483
|
logger.success(f"Repository '{cfg['repository']}' verified")
|
|
413
|
-
|
|
484
|
+
|
|
414
485
|
logger.info(f"Finding restore sequence for target backup: {target_label}")
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
except ValueError as e:
|
|
420
|
-
logger.error(f"Failed to find restore sequence: {e}")
|
|
421
|
-
sys.exit(1)
|
|
422
|
-
|
|
486
|
+
|
|
487
|
+
restore_pair = restore.find_restore_pair(database, target_label)
|
|
488
|
+
logger.success(f"Found restore sequence: {' -> '.join(restore_pair)}")
|
|
489
|
+
|
|
423
490
|
logger.info("Determining tables to restore from backup manifest...")
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
except ValueError as e:
|
|
434
|
-
logger.error(str(e))
|
|
435
|
-
sys.exit(1)
|
|
436
|
-
|
|
491
|
+
|
|
492
|
+
tables_to_restore = restore.get_tables_from_backup(
|
|
493
|
+
database,
|
|
494
|
+
target_label,
|
|
495
|
+
group=group,
|
|
496
|
+
table=table,
|
|
497
|
+
database=cfg["database"] if table else None,
|
|
498
|
+
)
|
|
499
|
+
|
|
437
500
|
if not tables_to_restore:
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
sys.exit(1)
|
|
445
|
-
|
|
446
|
-
logger.success(f"Found {len(tables_to_restore)} table(s) to restore: {', '.join(tables_to_restore)}")
|
|
447
|
-
|
|
501
|
+
raise exceptions.NoTablesFoundError(group=group, label=target_label)
|
|
502
|
+
|
|
503
|
+
logger.success(
|
|
504
|
+
f"Found {len(tables_to_restore)} table(s) to restore: {', '.join(tables_to_restore)}"
|
|
505
|
+
)
|
|
506
|
+
|
|
448
507
|
logger.info("Starting restore flow...")
|
|
449
508
|
result = restore.execute_restore_flow(
|
|
450
509
|
database,
|
|
451
|
-
cfg[
|
|
510
|
+
cfg["repository"],
|
|
452
511
|
restore_pair,
|
|
453
512
|
tables_to_restore,
|
|
454
513
|
rename_suffix,
|
|
455
|
-
skip_confirmation=yes
|
|
514
|
+
skip_confirmation=yes,
|
|
456
515
|
)
|
|
457
|
-
|
|
458
|
-
if result[
|
|
459
|
-
logger.success(result[
|
|
516
|
+
|
|
517
|
+
if result["success"]:
|
|
518
|
+
logger.success(result["message"])
|
|
460
519
|
sys.exit(0)
|
|
461
520
|
else:
|
|
462
521
|
logger.error(f"Restore failed: {result['error_message']}")
|
|
463
522
|
sys.exit(1)
|
|
464
|
-
|
|
523
|
+
|
|
524
|
+
except exceptions.InvalidTableNameError as e:
|
|
525
|
+
error_handler.handle_invalid_table_name_error(e)
|
|
526
|
+
sys.exit(1)
|
|
527
|
+
except exceptions.BackupLabelNotFoundError as e:
|
|
528
|
+
error_handler.handle_backup_label_not_found_error(e, config)
|
|
529
|
+
sys.exit(1)
|
|
530
|
+
except exceptions.NoSuccessfulFullBackupFoundError as e:
|
|
531
|
+
error_handler.handle_no_successful_full_backup_found_error(e, config)
|
|
532
|
+
sys.exit(1)
|
|
533
|
+
except exceptions.TableNotFoundInBackupError as e:
|
|
534
|
+
error_handler.handle_table_not_found_in_backup_error(e, config)
|
|
535
|
+
sys.exit(1)
|
|
536
|
+
except exceptions.NoTablesFoundError as e:
|
|
537
|
+
error_handler.handle_no_tables_found_error(e, config, target_label)
|
|
538
|
+
sys.exit(1)
|
|
539
|
+
except exceptions.SnapshotNotFoundError as e:
|
|
540
|
+
error_handler.handle_snapshot_not_found_error(e, config)
|
|
541
|
+
sys.exit(1)
|
|
542
|
+
except exceptions.RestoreOperationCancelledError:
|
|
543
|
+
error_handler.handle_restore_operation_cancelled_error()
|
|
544
|
+
sys.exit(1)
|
|
545
|
+
except exceptions.ConfigFileNotFoundError as e:
|
|
546
|
+
error_handler.handle_config_file_not_found_error(e)
|
|
547
|
+
sys.exit(1)
|
|
548
|
+
except exceptions.ConfigValidationError as e:
|
|
549
|
+
error_handler.handle_config_validation_error(e, config)
|
|
550
|
+
sys.exit(1)
|
|
551
|
+
except exceptions.ClusterHealthCheckFailedError as e:
|
|
552
|
+
error_handler.handle_cluster_health_check_failed_error(e, config)
|
|
553
|
+
sys.exit(1)
|
|
465
554
|
except FileNotFoundError as e:
|
|
466
|
-
|
|
555
|
+
error_handler.handle_config_file_not_found_error(exceptions.ConfigFileNotFoundError(str(e)))
|
|
467
556
|
sys.exit(1)
|
|
468
557
|
except ValueError as e:
|
|
469
|
-
|
|
558
|
+
error_handler.handle_config_validation_error(
|
|
559
|
+
exceptions.ConfigValidationError(str(e)), config
|
|
560
|
+
)
|
|
470
561
|
sys.exit(1)
|
|
471
562
|
except RuntimeError as e:
|
|
472
563
|
logger.error(f"{e}")
|
|
@@ -476,6 +567,5 @@ def restore_command(config, target_label, group, table, rename_suffix, yes):
|
|
|
476
567
|
sys.exit(1)
|
|
477
568
|
|
|
478
569
|
|
|
479
|
-
if __name__ ==
|
|
570
|
+
if __name__ == "__main__":
|
|
480
571
|
cli()
|
|
481
|
-
|