starrocks-br 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- starrocks_br/__init__.py +1 -0
- starrocks_br/cli.py +385 -0
- starrocks_br/concurrency.py +177 -0
- starrocks_br/config.py +41 -0
- starrocks_br/db.py +88 -0
- starrocks_br/executor.py +245 -0
- starrocks_br/health.py +34 -0
- starrocks_br/history.py +93 -0
- starrocks_br/labels.py +52 -0
- starrocks_br/logger.py +36 -0
- starrocks_br/planner.py +280 -0
- starrocks_br/repository.py +36 -0
- starrocks_br/restore.py +493 -0
- starrocks_br/schema.py +144 -0
- starrocks_br-0.1.0.dist-info/METADATA +12 -0
- starrocks_br-0.1.0.dist-info/RECORD +19 -0
- starrocks_br-0.1.0.dist-info/WHEEL +5 -0
- starrocks_br-0.1.0.dist-info/entry_points.txt +2 -0
- starrocks_br-0.1.0.dist-info/top_level.txt +1 -0
starrocks_br/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__all__ = ["cli", "config"]
|
starrocks_br/cli.py
ADDED
|
@@ -0,0 +1,385 @@
|
|
|
1
|
+
import click
|
|
2
|
+
import os
|
|
3
|
+
import sys
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from . import config as config_module
|
|
6
|
+
from . import db
|
|
7
|
+
from . import health
|
|
8
|
+
from . import repository
|
|
9
|
+
from . import concurrency
|
|
10
|
+
from . import planner
|
|
11
|
+
from . import labels
|
|
12
|
+
from . import executor
|
|
13
|
+
from . import restore
|
|
14
|
+
from . import schema
|
|
15
|
+
from . import logger
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@click.group()
|
|
19
|
+
def cli():
|
|
20
|
+
"""StarRocks Backup & Restore automation tool."""
|
|
21
|
+
pass
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@cli.command('init')
|
|
25
|
+
@click.option('--config', required=True, help='Path to config YAML file')
|
|
26
|
+
def init(config):
|
|
27
|
+
"""Initialize ops database and control tables.
|
|
28
|
+
|
|
29
|
+
Creates the ops database with required tables:
|
|
30
|
+
- ops.table_inventory: Inventory groups mapping to databases/tables
|
|
31
|
+
- ops.backup_history: Backup operation history
|
|
32
|
+
- ops.restore_history: Restore operation history
|
|
33
|
+
- ops.run_status: Job concurrency control
|
|
34
|
+
|
|
35
|
+
Run this once before using backup/restore commands.
|
|
36
|
+
"""
|
|
37
|
+
try:
|
|
38
|
+
cfg = config_module.load_config(config)
|
|
39
|
+
config_module.validate_config(cfg)
|
|
40
|
+
|
|
41
|
+
database = db.StarRocksDB(
|
|
42
|
+
host=cfg['host'],
|
|
43
|
+
port=cfg['port'],
|
|
44
|
+
user=cfg['user'],
|
|
45
|
+
password=os.getenv('STARROCKS_PASSWORD'),
|
|
46
|
+
database=cfg['database']
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
with database:
|
|
50
|
+
logger.info("Initializing ops schema...")
|
|
51
|
+
schema.initialize_ops_schema(database)
|
|
52
|
+
logger.info("")
|
|
53
|
+
logger.info("Next steps:")
|
|
54
|
+
logger.info("1. Insert your table inventory records:")
|
|
55
|
+
logger.info(" INSERT INTO ops.table_inventory")
|
|
56
|
+
logger.info(" (inventory_group, database_name, table_name)")
|
|
57
|
+
logger.info(" VALUES ('my_daily_incremental', 'your_db', 'your_fact_table');")
|
|
58
|
+
logger.info(" VALUES ('my_full_database_backup', 'your_db', '*');")
|
|
59
|
+
logger.info(" VALUES ('my_full_dimension_tables', 'your_db', 'dim_customers');")
|
|
60
|
+
logger.info(" VALUES ('my_full_dimension_tables', 'your_db', 'dim_products');")
|
|
61
|
+
logger.info("")
|
|
62
|
+
logger.info("2. Run your first backup:")
|
|
63
|
+
logger.info(" starrocks-br backup incremental --group my_daily_incremental --config config.yaml")
|
|
64
|
+
|
|
65
|
+
except FileNotFoundError as e:
|
|
66
|
+
logger.error(f"Config file not found: {e}")
|
|
67
|
+
sys.exit(1)
|
|
68
|
+
except ValueError as e:
|
|
69
|
+
logger.error(f"Configuration error: {e}")
|
|
70
|
+
sys.exit(1)
|
|
71
|
+
except Exception as e:
|
|
72
|
+
logger.error(f"Failed to initialize schema: {e}")
|
|
73
|
+
sys.exit(1)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
@cli.group()
|
|
77
|
+
def backup():
|
|
78
|
+
"""Backup commands."""
|
|
79
|
+
pass
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
@backup.command('incremental')
|
|
83
|
+
@click.option('--config', required=True, help='Path to config YAML file')
|
|
84
|
+
@click.option('--baseline-backup', help='Specific backup label to use as baseline (optional). If not provided, uses the latest successful full backup.')
|
|
85
|
+
@click.option('--group', required=True, help='Inventory group to backup from table_inventory. Supports wildcard \'*\'.')
|
|
86
|
+
@click.option('--name', help='Optional logical name (label) for the backup. Supports -v#r placeholder for auto-versioning.')
|
|
87
|
+
def backup_incremental(config, baseline_backup, group, name):
|
|
88
|
+
"""Run incremental backup of partitions changed since the latest full backup.
|
|
89
|
+
|
|
90
|
+
By default, uses the latest successful full backup as baseline.
|
|
91
|
+
Optionally specify a specific backup label to use as baseline.
|
|
92
|
+
|
|
93
|
+
Flow: load config → check health → ensure repository → reserve job slot →
|
|
94
|
+
find baseline backup → find recent partitions → generate label → build backup command → execute backup
|
|
95
|
+
"""
|
|
96
|
+
try:
|
|
97
|
+
cfg = config_module.load_config(config)
|
|
98
|
+
config_module.validate_config(cfg)
|
|
99
|
+
|
|
100
|
+
database = db.StarRocksDB(
|
|
101
|
+
host=cfg['host'],
|
|
102
|
+
port=cfg['port'],
|
|
103
|
+
user=cfg['user'],
|
|
104
|
+
password=os.getenv('STARROCKS_PASSWORD'),
|
|
105
|
+
database=cfg['database']
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
with database:
|
|
109
|
+
was_created = schema.ensure_ops_schema(database)
|
|
110
|
+
if was_created:
|
|
111
|
+
logger.warning("ops schema was auto-created. Please run 'starrocks-br init' after populating config.")
|
|
112
|
+
logger.warning("Remember to populate ops.table_inventory with your backup groups!")
|
|
113
|
+
sys.exit(1) # Exit if schema was just created, requires user action
|
|
114
|
+
|
|
115
|
+
healthy, message = health.check_cluster_health(database)
|
|
116
|
+
if not healthy:
|
|
117
|
+
logger.error(f"Cluster health check failed: {message}")
|
|
118
|
+
sys.exit(1)
|
|
119
|
+
|
|
120
|
+
logger.success(f"Cluster health: {message}")
|
|
121
|
+
|
|
122
|
+
repository.ensure_repository(database, cfg['repository'])
|
|
123
|
+
|
|
124
|
+
logger.success(f"Repository '{cfg['repository']}' verified")
|
|
125
|
+
|
|
126
|
+
label = labels.determine_backup_label(
|
|
127
|
+
db=database,
|
|
128
|
+
backup_type='incremental',
|
|
129
|
+
database_name=cfg['database'],
|
|
130
|
+
custom_name=name
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
logger.success(f"Generated label: {label}")
|
|
134
|
+
|
|
135
|
+
if baseline_backup:
|
|
136
|
+
logger.success(f"Using specified baseline backup: {baseline_backup}")
|
|
137
|
+
else:
|
|
138
|
+
latest_backup = planner.find_latest_full_backup(database, cfg['database'])
|
|
139
|
+
if latest_backup:
|
|
140
|
+
logger.success(f"Using latest full backup as baseline: {latest_backup['label']} ({latest_backup['backup_type']})")
|
|
141
|
+
else:
|
|
142
|
+
logger.warning("No full backup found - this will be the first incremental backup")
|
|
143
|
+
|
|
144
|
+
partitions = planner.find_recent_partitions(
|
|
145
|
+
database, cfg['database'], baseline_backup_label=baseline_backup, group_name=group
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
if not partitions:
|
|
149
|
+
logger.warning("No partitions found to backup")
|
|
150
|
+
sys.exit(1)
|
|
151
|
+
|
|
152
|
+
logger.success(f"Found {len(partitions)} partition(s) to backup")
|
|
153
|
+
|
|
154
|
+
backup_command = planner.build_incremental_backup_command(
|
|
155
|
+
partitions, cfg['repository'], label, cfg['database']
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
planner.record_backup_partitions(database, label, partitions)
|
|
159
|
+
|
|
160
|
+
concurrency.reserve_job_slot(database, scope='backup', label=label)
|
|
161
|
+
|
|
162
|
+
logger.success(f"Job slot reserved")
|
|
163
|
+
logger.info(f"Starting incremental backup for group '{group}'...")
|
|
164
|
+
result = executor.execute_backup(
|
|
165
|
+
database,
|
|
166
|
+
backup_command,
|
|
167
|
+
repository=cfg['repository'],
|
|
168
|
+
backup_type='incremental',
|
|
169
|
+
scope='backup',
|
|
170
|
+
database=cfg['database']
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
if result['success']:
|
|
174
|
+
logger.success(f"Backup completed successfully: {result['final_status']['state']}")
|
|
175
|
+
sys.exit(0)
|
|
176
|
+
else:
|
|
177
|
+
state = result.get('final_status', {}).get('state', 'UNKNOWN')
|
|
178
|
+
if state == "LOST":
|
|
179
|
+
logger.critical("Backup tracking lost!")
|
|
180
|
+
logger.warning("Another backup operation started during ours.")
|
|
181
|
+
logger.tip("Enable ops.run_status concurrency checks to prevent this.")
|
|
182
|
+
logger.error(f"{result['error_message']}")
|
|
183
|
+
sys.exit(1)
|
|
184
|
+
|
|
185
|
+
except FileNotFoundError as e:
|
|
186
|
+
logger.error(f"Config file not found: {e}")
|
|
187
|
+
sys.exit(1)
|
|
188
|
+
except ValueError as e:
|
|
189
|
+
logger.error(f"Configuration error: {e}")
|
|
190
|
+
sys.exit(1)
|
|
191
|
+
except RuntimeError as e:
|
|
192
|
+
logger.error(f"{e}")
|
|
193
|
+
sys.exit(1)
|
|
194
|
+
except Exception as e:
|
|
195
|
+
logger.error(f"Unexpected error: {e}")
|
|
196
|
+
sys.exit(1)
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
@backup.command('full')
|
|
200
|
+
@click.option('--config', required=True, help='Path to config YAML file')
|
|
201
|
+
@click.option('--group', required=True, help='Inventory group to backup from table_inventory. Supports wildcard \'*\'.')
|
|
202
|
+
@click.option('--name', help='Optional logical name (label) for the backup. Supports -v#r placeholder for auto-versioning.')
|
|
203
|
+
def backup_full(config, group, name):
|
|
204
|
+
"""Run a full backup for a specified inventory group.
|
|
205
|
+
|
|
206
|
+
Flow: load config → check health → ensure repository → reserve job slot →
|
|
207
|
+
find tables by group → generate label → build backup command → execute backup
|
|
208
|
+
"""
|
|
209
|
+
try:
|
|
210
|
+
cfg = config_module.load_config(config)
|
|
211
|
+
config_module.validate_config(cfg)
|
|
212
|
+
|
|
213
|
+
database = db.StarRocksDB(
|
|
214
|
+
host=cfg['host'],
|
|
215
|
+
port=cfg['port'],
|
|
216
|
+
user=cfg['user'],
|
|
217
|
+
password=os.getenv('STARROCKS_PASSWORD'),
|
|
218
|
+
database=cfg['database']
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
with database:
|
|
222
|
+
was_created = schema.ensure_ops_schema(database)
|
|
223
|
+
if was_created:
|
|
224
|
+
logger.warning("ops schema was auto-created. Please run 'starrocks-br init' after populating config.")
|
|
225
|
+
logger.warning("Remember to populate ops.table_inventory with your backup groups!")
|
|
226
|
+
sys.exit(1) # Exit if schema was just created, requires user action
|
|
227
|
+
|
|
228
|
+
healthy, message = health.check_cluster_health(database)
|
|
229
|
+
if not healthy:
|
|
230
|
+
logger.error(f"Cluster health check failed: {message}")
|
|
231
|
+
sys.exit(1)
|
|
232
|
+
|
|
233
|
+
logger.success(f"Cluster health: {message}")
|
|
234
|
+
|
|
235
|
+
repository.ensure_repository(database, cfg['repository'])
|
|
236
|
+
|
|
237
|
+
logger.success(f"Repository '{cfg['repository']}' verified")
|
|
238
|
+
|
|
239
|
+
label = labels.determine_backup_label(
|
|
240
|
+
db=database,
|
|
241
|
+
backup_type='full',
|
|
242
|
+
database_name=cfg['database'],
|
|
243
|
+
custom_name=name
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
logger.success(f"Generated label: {label}")
|
|
247
|
+
|
|
248
|
+
backup_command = planner.build_full_backup_command(
|
|
249
|
+
database, group, cfg['repository'], label, cfg['database']
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
if not backup_command:
|
|
253
|
+
logger.warning(f"No tables found in group '{group}' for database '{cfg['database']}' to backup")
|
|
254
|
+
sys.exit(1)
|
|
255
|
+
|
|
256
|
+
tables = planner.find_tables_by_group(database, group)
|
|
257
|
+
all_partitions = planner.get_all_partitions_for_tables(database, cfg['database'], tables)
|
|
258
|
+
planner.record_backup_partitions(database, label, all_partitions)
|
|
259
|
+
|
|
260
|
+
concurrency.reserve_job_slot(database, scope='backup', label=label)
|
|
261
|
+
|
|
262
|
+
logger.success(f"Job slot reserved")
|
|
263
|
+
logger.info(f"Starting full backup for group '{group}'...")
|
|
264
|
+
result = executor.execute_backup(
|
|
265
|
+
database,
|
|
266
|
+
backup_command,
|
|
267
|
+
repository=cfg['repository'],
|
|
268
|
+
backup_type='full',
|
|
269
|
+
scope='backup',
|
|
270
|
+
database=cfg['database']
|
|
271
|
+
)
|
|
272
|
+
|
|
273
|
+
if result['success']:
|
|
274
|
+
logger.success(f"Backup completed successfully: {result['final_status']['state']}")
|
|
275
|
+
sys.exit(0)
|
|
276
|
+
else:
|
|
277
|
+
state = result.get('final_status', {}).get('state', 'UNKNOWN')
|
|
278
|
+
if state == "LOST":
|
|
279
|
+
logger.critical("Backup tracking lost!")
|
|
280
|
+
logger.warning("Another backup operation started during ours.")
|
|
281
|
+
logger.tip("Enable ops.run_status concurrency checks to prevent this.")
|
|
282
|
+
logger.error(f"{result['error_message']}")
|
|
283
|
+
sys.exit(1)
|
|
284
|
+
|
|
285
|
+
except (FileNotFoundError, ValueError, RuntimeError, Exception) as e:
|
|
286
|
+
if isinstance(e, FileNotFoundError):
|
|
287
|
+
logger.error(f"Config file not found: {e}")
|
|
288
|
+
elif isinstance(e, ValueError):
|
|
289
|
+
logger.error(f"Configuration error: {e}")
|
|
290
|
+
elif isinstance(e, RuntimeError):
|
|
291
|
+
logger.error(f"{e}")
|
|
292
|
+
else:
|
|
293
|
+
logger.error(f"Unexpected error: {e}")
|
|
294
|
+
sys.exit(1)
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
@cli.command('restore')
|
|
300
|
+
@click.option('--config', required=True, help='Path to config YAML file')
|
|
301
|
+
@click.option('--target-label', required=True, help='Backup label to restore to')
|
|
302
|
+
@click.option('--group', help='Optional inventory group to filter tables to restore')
|
|
303
|
+
@click.option('--rename-suffix', default='_restored', help='Suffix for temporary tables during restore (default: _restored)')
|
|
304
|
+
def restore_command(config, target_label, group, rename_suffix):
|
|
305
|
+
"""Restore data to a specific point in time using intelligent backup chain resolution.
|
|
306
|
+
|
|
307
|
+
This command automatically determines the correct sequence of backups needed for restore:
|
|
308
|
+
- For full backups: restores directly from the target backup
|
|
309
|
+
- For incremental backups: restores the base full backup first, then applies the incremental
|
|
310
|
+
|
|
311
|
+
The restore process uses temporary tables with the specified suffix for safety, then performs
|
|
312
|
+
an atomic rename to make the restored data live.
|
|
313
|
+
|
|
314
|
+
Flow: load config → find restore pair → get tables from backup → execute restore flow
|
|
315
|
+
"""
|
|
316
|
+
try:
|
|
317
|
+
cfg = config_module.load_config(config)
|
|
318
|
+
config_module.validate_config(cfg)
|
|
319
|
+
|
|
320
|
+
database = db.StarRocksDB(
|
|
321
|
+
host=cfg['host'],
|
|
322
|
+
port=cfg['port'],
|
|
323
|
+
user=cfg['user'],
|
|
324
|
+
password=os.getenv('STARROCKS_PASSWORD'),
|
|
325
|
+
database=cfg['database']
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
with database:
|
|
329
|
+
was_created = schema.ensure_ops_schema(database)
|
|
330
|
+
if was_created:
|
|
331
|
+
logger.warning("ops schema was auto-created. Please run 'starrocks-br init' after populating config.")
|
|
332
|
+
logger.warning("Remember to populate ops.table_inventory with your backup groups!")
|
|
333
|
+
sys.exit(1) # Exit if schema was just created, requires user action
|
|
334
|
+
|
|
335
|
+
logger.info(f"Finding restore sequence for target backup: {target_label}")
|
|
336
|
+
|
|
337
|
+
try:
|
|
338
|
+
restore_pair = restore.find_restore_pair(database, target_label)
|
|
339
|
+
logger.success(f"Found restore sequence: {' -> '.join(restore_pair)}")
|
|
340
|
+
except ValueError as e:
|
|
341
|
+
logger.error(f"Failed to find restore sequence: {e}")
|
|
342
|
+
sys.exit(1)
|
|
343
|
+
|
|
344
|
+
logger.info("Determining tables to restore from backup manifest...")
|
|
345
|
+
tables_to_restore = restore.get_tables_from_backup(database, target_label, group)
|
|
346
|
+
|
|
347
|
+
if not tables_to_restore:
|
|
348
|
+
if group:
|
|
349
|
+
logger.warning(f"No tables found in backup '{target_label}' for group '{group}'")
|
|
350
|
+
else:
|
|
351
|
+
logger.warning(f"No tables found in backup '{target_label}'")
|
|
352
|
+
sys.exit(1)
|
|
353
|
+
|
|
354
|
+
logger.success(f"Found {len(tables_to_restore)} table(s) to restore: {', '.join(tables_to_restore)}")
|
|
355
|
+
|
|
356
|
+
logger.info("Starting restore flow...")
|
|
357
|
+
result = restore.execute_restore_flow(
|
|
358
|
+
database,
|
|
359
|
+
cfg['repository'],
|
|
360
|
+
restore_pair,
|
|
361
|
+
tables_to_restore,
|
|
362
|
+
rename_suffix
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
if result['success']:
|
|
366
|
+
logger.success(result['message'])
|
|
367
|
+
sys.exit(0)
|
|
368
|
+
else:
|
|
369
|
+
logger.error(f"Restore failed: {result['error_message']}")
|
|
370
|
+
sys.exit(1)
|
|
371
|
+
|
|
372
|
+
except FileNotFoundError as e:
|
|
373
|
+
logger.error(f"Config file not found: {e}")
|
|
374
|
+
sys.exit(1)
|
|
375
|
+
except ValueError as e:
|
|
376
|
+
logger.error(f"Configuration error: {e}")
|
|
377
|
+
sys.exit(1)
|
|
378
|
+
except Exception as e:
|
|
379
|
+
logger.error(f"Unexpected error: {e}")
|
|
380
|
+
sys.exit(1)
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
if __name__ == '__main__':
|
|
384
|
+
cli()
|
|
385
|
+
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
from typing import Literal, List, Tuple
|
|
2
|
+
from . import logger
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def reserve_job_slot(db, scope: str, label: str) -> None:
|
|
6
|
+
"""Reserve a job slot in ops.run_status to prevent overlapping jobs.
|
|
7
|
+
|
|
8
|
+
We consider any row with state='ACTIVE' for the same scope as a conflict.
|
|
9
|
+
However, we implement self-healing logic to automatically clean up stale locks.
|
|
10
|
+
"""
|
|
11
|
+
active_jobs = _get_active_jobs_for_scope(db, scope)
|
|
12
|
+
|
|
13
|
+
if not active_jobs:
|
|
14
|
+
_insert_new_job(db, scope, label)
|
|
15
|
+
return
|
|
16
|
+
|
|
17
|
+
_handle_active_job_conflicts(db, scope, active_jobs)
|
|
18
|
+
|
|
19
|
+
_insert_new_job(db, scope, label)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _get_active_jobs_for_scope(db, scope: str) -> List[Tuple[str, str, str]]:
|
|
23
|
+
"""Get all active jobs for the given scope."""
|
|
24
|
+
rows = db.query("SELECT scope, label, state FROM ops.run_status WHERE state = 'ACTIVE'")
|
|
25
|
+
return [row for row in rows if row[0] == scope]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _handle_active_job_conflicts(db, scope: str, active_jobs: List[Tuple[str, str, str]]) -> None:
|
|
29
|
+
"""Handle conflicts with active jobs, cleaning up stale ones where possible."""
|
|
30
|
+
for active_scope, active_label, _ in active_jobs:
|
|
31
|
+
if _can_heal_stale_job(active_scope, active_label, db):
|
|
32
|
+
_cleanup_stale_job(db, active_scope, active_label)
|
|
33
|
+
logger.success(f"Cleaned up stale backup job: {active_label}")
|
|
34
|
+
else:
|
|
35
|
+
_raise_concurrency_conflict(scope, active_jobs)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _can_heal_stale_job(scope: str, label: str, db) -> bool:
|
|
39
|
+
"""Check if a stale job can be healed (only for backup jobs)."""
|
|
40
|
+
if scope != 'backup':
|
|
41
|
+
return False
|
|
42
|
+
|
|
43
|
+
return _is_backup_job_stale(db, label)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _raise_concurrency_conflict(scope: str, active_jobs: List[Tuple[str, str, str]]) -> None:
|
|
47
|
+
"""Raise a concurrency conflict error with helpful message."""
|
|
48
|
+
active_job_strings = [f"{job[0]}:{job[1]}" for job in active_jobs]
|
|
49
|
+
active_labels = [job[1] for job in active_jobs]
|
|
50
|
+
|
|
51
|
+
raise RuntimeError(
|
|
52
|
+
f"Concurrency conflict: Another '{scope}' job is already ACTIVE: {', '.join(active_job_strings)}. "
|
|
53
|
+
f"Wait for it to complete or cancel it via: UPDATE ops.run_status SET state='CANCELLED' "
|
|
54
|
+
f"WHERE label='{active_labels[0]}' AND state='ACTIVE'"
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _insert_new_job(db, scope: str, label: str) -> None:
|
|
59
|
+
"""Insert a new active job record."""
|
|
60
|
+
sql = (
|
|
61
|
+
"INSERT INTO ops.run_status (scope, label, state, started_at) "
|
|
62
|
+
"VALUES ('%s','%s','ACTIVE', NOW())" % (scope, label)
|
|
63
|
+
)
|
|
64
|
+
db.execute(sql)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _is_backup_job_stale(db, label: str) -> bool:
|
|
68
|
+
"""Check if a backup job is stale by querying StarRocks SHOW BACKUP.
|
|
69
|
+
|
|
70
|
+
Returns True if the job is stale (not actually running), False if it's still active.
|
|
71
|
+
"""
|
|
72
|
+
try:
|
|
73
|
+
user_databases = _get_user_databases(db)
|
|
74
|
+
|
|
75
|
+
for database_name in user_databases:
|
|
76
|
+
job_status = _check_backup_job_in_database(db, database_name, label)
|
|
77
|
+
|
|
78
|
+
if job_status is None:
|
|
79
|
+
continue
|
|
80
|
+
|
|
81
|
+
if job_status == "active":
|
|
82
|
+
return False
|
|
83
|
+
elif job_status == "stale":
|
|
84
|
+
return True
|
|
85
|
+
|
|
86
|
+
return True
|
|
87
|
+
|
|
88
|
+
except Exception as e:
|
|
89
|
+
logger.error(f"Error checking backup job status: {e}")
|
|
90
|
+
return False
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _get_user_databases(db) -> List[str]:
|
|
94
|
+
"""Get list of user databases (excluding system databases)."""
|
|
95
|
+
SYSTEM_DATABASES = {'information_schema', 'mysql', 'sys', 'ops'}
|
|
96
|
+
|
|
97
|
+
databases = db.query("SHOW DATABASES")
|
|
98
|
+
return [
|
|
99
|
+
_extract_database_name(db_row)
|
|
100
|
+
for db_row in databases
|
|
101
|
+
if _extract_database_name(db_row) not in SYSTEM_DATABASES
|
|
102
|
+
]
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def _extract_database_name(db_row) -> str:
|
|
106
|
+
"""Extract database name from database query result."""
|
|
107
|
+
if isinstance(db_row, (list, tuple)):
|
|
108
|
+
return db_row[0]
|
|
109
|
+
return db_row.get('Database', '')
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def _check_backup_job_in_database(db, database_name: str, label: str) -> str:
|
|
113
|
+
"""Check if backup job exists in specific database and return its status.
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
'active' if job is still running
|
|
117
|
+
'stale' if job is in terminal state
|
|
118
|
+
None if job not found in this database
|
|
119
|
+
"""
|
|
120
|
+
try:
|
|
121
|
+
show_backup_query = f"SHOW BACKUP FROM {database_name}"
|
|
122
|
+
backup_rows = db.query(show_backup_query)
|
|
123
|
+
|
|
124
|
+
if not backup_rows:
|
|
125
|
+
return None
|
|
126
|
+
|
|
127
|
+
result = backup_rows[0]
|
|
128
|
+
snapshot_name, state = _extract_backup_info(result)
|
|
129
|
+
|
|
130
|
+
if snapshot_name != label:
|
|
131
|
+
return None
|
|
132
|
+
|
|
133
|
+
if state in ["FINISHED", "CANCELLED", "FAILED"]:
|
|
134
|
+
return "stale"
|
|
135
|
+
else:
|
|
136
|
+
return "active"
|
|
137
|
+
|
|
138
|
+
except Exception:
|
|
139
|
+
return None
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def _extract_backup_info(result) -> Tuple[str, str]:
|
|
143
|
+
"""Extract snapshot name and state from SHOW BACKUP result."""
|
|
144
|
+
if isinstance(result, dict):
|
|
145
|
+
snapshot_name = result.get("SnapshotName", "")
|
|
146
|
+
state = result.get("State", "UNKNOWN")
|
|
147
|
+
else:
|
|
148
|
+
snapshot_name = result[1] if len(result) > 1 else ""
|
|
149
|
+
state = result[3] if len(result) > 3 else "UNKNOWN"
|
|
150
|
+
|
|
151
|
+
return snapshot_name, state
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def _cleanup_stale_job(db, scope: str, label: str) -> None:
|
|
155
|
+
"""Clean up a stale job by updating its state to CANCELLED."""
|
|
156
|
+
sql = (
|
|
157
|
+
"UPDATE ops.run_status SET state='CANCELLED', finished_at=NOW() "
|
|
158
|
+
"WHERE scope='%s' AND label='%s' AND state='ACTIVE'" % (scope, label)
|
|
159
|
+
)
|
|
160
|
+
db.execute(sql)
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def complete_job_slot(
|
|
164
|
+
db,
|
|
165
|
+
scope: str,
|
|
166
|
+
label: str,
|
|
167
|
+
final_state: Literal['FINISHED', 'FAILED', 'CANCELLED']
|
|
168
|
+
) -> None:
|
|
169
|
+
"""Complete job slot and persist final state.
|
|
170
|
+
|
|
171
|
+
Simple approach: update the same row by scope/label.
|
|
172
|
+
"""
|
|
173
|
+
sql = (
|
|
174
|
+
"UPDATE ops.run_status SET state='%s', finished_at=NOW() WHERE scope='%s' AND label='%s'"
|
|
175
|
+
% (final_state, scope, label)
|
|
176
|
+
)
|
|
177
|
+
db.execute(sql)
|
starrocks_br/config.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import yaml
|
|
2
|
+
from typing import Dict
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def load_config(config_path: str) -> Dict:
|
|
6
|
+
"""Load and parse YAML configuration file.
|
|
7
|
+
|
|
8
|
+
Args:
|
|
9
|
+
config_path: Path to the YAML config file
|
|
10
|
+
|
|
11
|
+
Returns:
|
|
12
|
+
Dictionary containing configuration
|
|
13
|
+
|
|
14
|
+
Raises:
|
|
15
|
+
FileNotFoundError: If config file doesn't exist
|
|
16
|
+
yaml.YAMLError: If config file is not valid YAML
|
|
17
|
+
"""
|
|
18
|
+
with open(config_path, 'r') as f:
|
|
19
|
+
config = yaml.safe_load(f)
|
|
20
|
+
|
|
21
|
+
if not isinstance(config, dict):
|
|
22
|
+
raise ValueError("Config must be a dictionary")
|
|
23
|
+
|
|
24
|
+
return config
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def validate_config(config: Dict) -> None:
|
|
28
|
+
"""Validate that config contains required fields.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
config: Configuration dictionary
|
|
32
|
+
|
|
33
|
+
Raises:
|
|
34
|
+
ValueError: If required fields are missing
|
|
35
|
+
"""
|
|
36
|
+
required_fields = ['host', 'port', 'user', 'database', 'repository']
|
|
37
|
+
|
|
38
|
+
for field in required_fields:
|
|
39
|
+
if field not in config:
|
|
40
|
+
raise ValueError(f"Missing required config field: {field}")
|
|
41
|
+
|
starrocks_br/db.py
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
import mysql.connector
|
|
2
|
+
from typing import List
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class StarRocksDB:
|
|
6
|
+
"""Database connection wrapper for StarRocks."""
|
|
7
|
+
|
|
8
|
+
def __init__(self, host: str, port: int, user: str, password: str, database: str):
|
|
9
|
+
"""Initialize database connection.
|
|
10
|
+
|
|
11
|
+
Args:
|
|
12
|
+
host: Database host
|
|
13
|
+
port: Database port
|
|
14
|
+
user: Database user
|
|
15
|
+
password: Database password
|
|
16
|
+
database: Default database name
|
|
17
|
+
"""
|
|
18
|
+
self.host = host
|
|
19
|
+
self.port = port
|
|
20
|
+
self.user = user
|
|
21
|
+
self.password = password
|
|
22
|
+
self.database = database
|
|
23
|
+
self._connection = None
|
|
24
|
+
|
|
25
|
+
def connect(self) -> None:
|
|
26
|
+
"""Establish database connection."""
|
|
27
|
+
self._connection = mysql.connector.connect(
|
|
28
|
+
host=self.host,
|
|
29
|
+
port=self.port,
|
|
30
|
+
user=self.user,
|
|
31
|
+
password=self.password,
|
|
32
|
+
database=self.database
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
def close(self) -> None:
|
|
36
|
+
"""Close database connection."""
|
|
37
|
+
if self._connection:
|
|
38
|
+
self._connection.close()
|
|
39
|
+
self._connection = None
|
|
40
|
+
|
|
41
|
+
def execute(self, sql: str) -> None:
|
|
42
|
+
"""Execute a SQL statement that doesn't return results.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
sql: SQL statement to execute
|
|
46
|
+
"""
|
|
47
|
+
if not self._connection:
|
|
48
|
+
self.connect()
|
|
49
|
+
|
|
50
|
+
cursor = self._connection.cursor()
|
|
51
|
+
try:
|
|
52
|
+
cursor.execute(sql)
|
|
53
|
+
self._connection.commit()
|
|
54
|
+
finally:
|
|
55
|
+
cursor.close()
|
|
56
|
+
|
|
57
|
+
def query(self, sql: str, params: tuple = None) -> List[tuple]:
|
|
58
|
+
"""Execute a SQL query and return results.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
sql: SQL query to execute
|
|
62
|
+
params: Optional tuple of parameters for parameterized queries
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
List of tuples containing query results
|
|
66
|
+
"""
|
|
67
|
+
if not self._connection:
|
|
68
|
+
self.connect()
|
|
69
|
+
|
|
70
|
+
cursor = self._connection.cursor()
|
|
71
|
+
try:
|
|
72
|
+
if params:
|
|
73
|
+
cursor.execute(sql, params)
|
|
74
|
+
else:
|
|
75
|
+
cursor.execute(sql)
|
|
76
|
+
return cursor.fetchall()
|
|
77
|
+
finally:
|
|
78
|
+
cursor.close()
|
|
79
|
+
|
|
80
|
+
def __enter__(self):
|
|
81
|
+
"""Context manager entry."""
|
|
82
|
+
self.connect()
|
|
83
|
+
return self
|
|
84
|
+
|
|
85
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
86
|
+
"""Context manager exit."""
|
|
87
|
+
self.close()
|
|
88
|
+
|