starrocks-br 0.5.1__py3-none-any.whl → 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- starrocks_br/__init__.py +14 -0
- starrocks_br/cli.py +117 -36
- starrocks_br/concurrency.py +47 -23
- starrocks_br/config.py +89 -0
- starrocks_br/db.py +14 -0
- starrocks_br/error_handler.py +73 -12
- starrocks_br/exceptions.py +29 -0
- starrocks_br/executor.py +29 -6
- starrocks_br/health.py +15 -0
- starrocks_br/history.py +23 -9
- starrocks_br/labels.py +19 -3
- starrocks_br/logger.py +14 -0
- starrocks_br/planner.py +70 -13
- starrocks_br/repository.py +15 -1
- starrocks_br/restore.py +211 -40
- starrocks_br/schema.py +103 -43
- starrocks_br/timezone.py +14 -0
- starrocks_br/utils.py +15 -0
- {starrocks_br-0.5.1.dist-info → starrocks_br-0.6.0.dist-info}/METADATA +34 -19
- starrocks_br-0.6.0.dist-info/RECORD +24 -0
- {starrocks_br-0.5.1.dist-info → starrocks_br-0.6.0.dist-info}/WHEEL +1 -1
- starrocks_br-0.6.0.dist-info/licenses/LICENSE +201 -0
- starrocks_br-0.5.1.dist-info/RECORD +0 -23
- {starrocks_br-0.5.1.dist-info → starrocks_br-0.6.0.dist-info}/entry_points.txt +0 -0
- {starrocks_br-0.5.1.dist-info → starrocks_br-0.6.0.dist-info}/top_level.txt +0 -0
starrocks_br/__init__.py
CHANGED
|
@@ -1 +1,15 @@
|
|
|
1
|
+
# Copyright 2025 deep-bi
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
1
15
|
__all__ = ["cli", "config"]
|
starrocks_br/cli.py
CHANGED
|
@@ -1,3 +1,17 @@
|
|
|
1
|
+
# Copyright 2025 deep-bi
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
1
15
|
import os
|
|
2
16
|
import sys
|
|
3
17
|
|
|
@@ -84,13 +98,13 @@ def cli(ctx, verbose):
|
|
|
84
98
|
@cli.command("init")
|
|
85
99
|
@click.option("--config", required=True, help="Path to config YAML file")
|
|
86
100
|
def init(config):
|
|
87
|
-
"""Initialize
|
|
101
|
+
"""Initialize operations database and control tables.
|
|
88
102
|
|
|
89
|
-
Creates the
|
|
90
|
-
-
|
|
91
|
-
-
|
|
92
|
-
-
|
|
93
|
-
-
|
|
103
|
+
Creates the operations database (default: 'ops') with required tables:
|
|
104
|
+
- table_inventory: Inventory groups mapping to databases/tables
|
|
105
|
+
- backup_history: Backup operation history
|
|
106
|
+
- restore_history: Restore operation history
|
|
107
|
+
- run_status: Job concurrency control
|
|
94
108
|
|
|
95
109
|
Run this once before using backup/restore commands.
|
|
96
110
|
"""
|
|
@@ -98,6 +112,9 @@ def init(config):
|
|
|
98
112
|
cfg = config_module.load_config(config)
|
|
99
113
|
config_module.validate_config(cfg)
|
|
100
114
|
|
|
115
|
+
ops_database = config_module.get_ops_database(cfg)
|
|
116
|
+
table_inventory_entries = config_module.get_table_inventory_entries(cfg)
|
|
117
|
+
|
|
101
118
|
database = db.StarRocksDB(
|
|
102
119
|
host=cfg["host"],
|
|
103
120
|
port=cfg["port"],
|
|
@@ -107,23 +124,43 @@ def init(config):
|
|
|
107
124
|
tls_config=cfg.get("tls"),
|
|
108
125
|
)
|
|
109
126
|
|
|
127
|
+
ops_database = config_module.get_ops_database(cfg)
|
|
128
|
+
|
|
110
129
|
with database:
|
|
111
|
-
logger.info("
|
|
112
|
-
|
|
113
|
-
logger.info("")
|
|
114
|
-
logger.info("Next steps:")
|
|
115
|
-
logger.info("1. Insert your table inventory records:")
|
|
116
|
-
logger.info(" INSERT INTO ops.table_inventory")
|
|
117
|
-
logger.info(" (inventory_group, database_name, table_name)")
|
|
118
|
-
logger.info(" VALUES ('my_daily_incremental', 'your_db', 'your_fact_table');")
|
|
119
|
-
logger.info(" VALUES ('my_full_database_backup', 'your_db', '*');")
|
|
120
|
-
logger.info(" VALUES ('my_full_dimension_tables', 'your_db', 'dim_customers');")
|
|
121
|
-
logger.info(" VALUES ('my_full_dimension_tables', 'your_db', 'dim_products');")
|
|
130
|
+
logger.info("Validating repository...")
|
|
131
|
+
repository.ensure_repository(database, cfg["repository"])
|
|
122
132
|
logger.info("")
|
|
123
|
-
|
|
124
|
-
logger.info(
|
|
125
|
-
|
|
133
|
+
|
|
134
|
+
logger.info("Initializing ops schema...")
|
|
135
|
+
schema.initialize_ops_schema(
|
|
136
|
+
database, ops_database=ops_database, table_inventory_entries=table_inventory_entries
|
|
126
137
|
)
|
|
138
|
+
logger.info("")
|
|
139
|
+
|
|
140
|
+
if table_inventory_entries:
|
|
141
|
+
logger.success(
|
|
142
|
+
f"Table inventory bootstrapped from config with {len(table_inventory_entries)} entries"
|
|
143
|
+
)
|
|
144
|
+
logger.info("")
|
|
145
|
+
logger.info("Next steps:")
|
|
146
|
+
logger.info("1. Run your first backup:")
|
|
147
|
+
logger.info(
|
|
148
|
+
f" starrocks-br backup incremental --group <your_group_name> --config {config}"
|
|
149
|
+
)
|
|
150
|
+
else:
|
|
151
|
+
logger.info("Next steps:")
|
|
152
|
+
logger.info("1. Insert your table inventory records:")
|
|
153
|
+
logger.info(f" INSERT INTO {ops_database}.table_inventory")
|
|
154
|
+
logger.info(" (inventory_group, database_name, table_name)")
|
|
155
|
+
logger.info(" VALUES ('my_daily_incremental', 'your_db', 'your_fact_table');")
|
|
156
|
+
logger.info(" VALUES ('my_full_database_backup', 'your_db', '*');")
|
|
157
|
+
logger.info(" VALUES ('my_full_dimension_tables', 'your_db', 'dim_customers');")
|
|
158
|
+
logger.info(" VALUES ('my_full_dimension_tables', 'your_db', 'dim_products');")
|
|
159
|
+
logger.info("")
|
|
160
|
+
logger.info("2. Run your first backup:")
|
|
161
|
+
logger.info(
|
|
162
|
+
" starrocks-br backup incremental --group my_daily_incremental --config config.yaml"
|
|
163
|
+
)
|
|
127
164
|
|
|
128
165
|
except exceptions.ConfigFileNotFoundError as e:
|
|
129
166
|
error_handler.handle_config_file_not_found_error(e)
|
|
@@ -182,13 +219,17 @@ def backup_incremental(config, baseline_backup, group, name):
|
|
|
182
219
|
tls_config=cfg.get("tls"),
|
|
183
220
|
)
|
|
184
221
|
|
|
222
|
+
ops_database = config_module.get_ops_database(cfg)
|
|
223
|
+
|
|
185
224
|
with database:
|
|
186
|
-
was_created = schema.ensure_ops_schema(database)
|
|
225
|
+
was_created = schema.ensure_ops_schema(database, ops_database=ops_database)
|
|
187
226
|
if was_created:
|
|
188
227
|
logger.warning(
|
|
189
228
|
"ops schema was auto-created. Please run 'starrocks-br init' after populating config."
|
|
190
229
|
)
|
|
191
|
-
logger.warning(
|
|
230
|
+
logger.warning(
|
|
231
|
+
"Remember to populate the table_inventory table with your backup groups!"
|
|
232
|
+
)
|
|
192
233
|
sys.exit(1) # Exit if schema was just created, requires user action
|
|
193
234
|
|
|
194
235
|
healthy, message = health.check_cluster_health(database)
|
|
@@ -207,6 +248,7 @@ def backup_incremental(config, baseline_backup, group, name):
|
|
|
207
248
|
backup_type="incremental",
|
|
208
249
|
database_name=cfg["database"],
|
|
209
250
|
custom_name=name,
|
|
251
|
+
ops_database=ops_database,
|
|
210
252
|
)
|
|
211
253
|
|
|
212
254
|
logger.success(f"Generated label: {label}")
|
|
@@ -225,7 +267,11 @@ def backup_incremental(config, baseline_backup, group, name):
|
|
|
225
267
|
)
|
|
226
268
|
|
|
227
269
|
partitions = planner.find_recent_partitions(
|
|
228
|
-
database,
|
|
270
|
+
database,
|
|
271
|
+
cfg["database"],
|
|
272
|
+
baseline_backup_label=baseline_backup,
|
|
273
|
+
group_name=group,
|
|
274
|
+
ops_database=ops_database,
|
|
229
275
|
)
|
|
230
276
|
|
|
231
277
|
if not partitions:
|
|
@@ -238,9 +284,11 @@ def backup_incremental(config, baseline_backup, group, name):
|
|
|
238
284
|
partitions, cfg["repository"], label, cfg["database"]
|
|
239
285
|
)
|
|
240
286
|
|
|
241
|
-
concurrency.reserve_job_slot(
|
|
287
|
+
concurrency.reserve_job_slot(
|
|
288
|
+
database, scope="backup", label=label, ops_database=ops_database
|
|
289
|
+
)
|
|
242
290
|
|
|
243
|
-
planner.record_backup_partitions(database, label, partitions)
|
|
291
|
+
planner.record_backup_partitions(database, label, partitions, ops_database=ops_database)
|
|
244
292
|
|
|
245
293
|
logger.success("Job slot reserved")
|
|
246
294
|
logger.info(f"Starting incremental backup for group '{group}'...")
|
|
@@ -251,6 +299,7 @@ def backup_incremental(config, baseline_backup, group, name):
|
|
|
251
299
|
backup_type="incremental",
|
|
252
300
|
scope="backup",
|
|
253
301
|
database=cfg["database"],
|
|
302
|
+
ops_database=ops_database,
|
|
254
303
|
)
|
|
255
304
|
|
|
256
305
|
if result["success"]:
|
|
@@ -274,7 +323,7 @@ def backup_incremental(config, baseline_backup, group, name):
|
|
|
274
323
|
if state == "LOST":
|
|
275
324
|
logger.critical("Backup tracking lost!")
|
|
276
325
|
logger.warning("Another backup operation started during ours.")
|
|
277
|
-
logger.tip("Enable
|
|
326
|
+
logger.tip("Enable run_status concurrency checks to prevent this.")
|
|
278
327
|
logger.error(f"{result['error_message']}")
|
|
279
328
|
sys.exit(1)
|
|
280
329
|
|
|
@@ -334,13 +383,17 @@ def backup_full(config, group, name):
|
|
|
334
383
|
tls_config=cfg.get("tls"),
|
|
335
384
|
)
|
|
336
385
|
|
|
386
|
+
ops_database = config_module.get_ops_database(cfg)
|
|
387
|
+
|
|
337
388
|
with database:
|
|
338
|
-
was_created = schema.ensure_ops_schema(database)
|
|
389
|
+
was_created = schema.ensure_ops_schema(database, ops_database=ops_database)
|
|
339
390
|
if was_created:
|
|
340
391
|
logger.warning(
|
|
341
392
|
"ops schema was auto-created. Please run 'starrocks-br init' after populating config."
|
|
342
393
|
)
|
|
343
|
-
logger.warning(
|
|
394
|
+
logger.warning(
|
|
395
|
+
"Remember to populate the table_inventory table with your backup groups!"
|
|
396
|
+
)
|
|
344
397
|
sys.exit(1) # Exit if schema was just created, requires user action
|
|
345
398
|
|
|
346
399
|
healthy, message = health.check_cluster_health(database)
|
|
@@ -355,13 +408,25 @@ def backup_full(config, group, name):
|
|
|
355
408
|
logger.success(f"Repository '{cfg['repository']}' verified")
|
|
356
409
|
|
|
357
410
|
label = labels.determine_backup_label(
|
|
358
|
-
db=database,
|
|
411
|
+
db=database,
|
|
412
|
+
backup_type="full",
|
|
413
|
+
database_name=cfg["database"],
|
|
414
|
+
custom_name=name,
|
|
415
|
+
ops_database=ops_database,
|
|
359
416
|
)
|
|
360
417
|
|
|
361
418
|
logger.success(f"Generated label: {label}")
|
|
362
419
|
|
|
420
|
+
tables = planner.find_tables_by_group(database, group, ops_database)
|
|
421
|
+
planner.validate_tables_exist(database, cfg["database"], tables, group)
|
|
422
|
+
|
|
363
423
|
backup_command = planner.build_full_backup_command(
|
|
364
|
-
database,
|
|
424
|
+
database,
|
|
425
|
+
group,
|
|
426
|
+
cfg["repository"],
|
|
427
|
+
label,
|
|
428
|
+
cfg["database"],
|
|
429
|
+
ops_database=ops_database,
|
|
365
430
|
)
|
|
366
431
|
|
|
367
432
|
if not backup_command:
|
|
@@ -375,9 +440,13 @@ def backup_full(config, group, name):
|
|
|
375
440
|
database, cfg["database"], tables
|
|
376
441
|
)
|
|
377
442
|
|
|
378
|
-
concurrency.reserve_job_slot(
|
|
443
|
+
concurrency.reserve_job_slot(
|
|
444
|
+
database, scope="backup", label=label, ops_database=ops_database
|
|
445
|
+
)
|
|
379
446
|
|
|
380
|
-
planner.record_backup_partitions(
|
|
447
|
+
planner.record_backup_partitions(
|
|
448
|
+
database, label, all_partitions, ops_database=ops_database
|
|
449
|
+
)
|
|
381
450
|
|
|
382
451
|
logger.success("Job slot reserved")
|
|
383
452
|
logger.info(f"Starting full backup for group '{group}'...")
|
|
@@ -388,6 +457,7 @@ def backup_full(config, group, name):
|
|
|
388
457
|
backup_type="full",
|
|
389
458
|
scope="backup",
|
|
390
459
|
database=cfg["database"],
|
|
460
|
+
ops_database=ops_database,
|
|
391
461
|
)
|
|
392
462
|
|
|
393
463
|
if result["success"]:
|
|
@@ -405,10 +475,13 @@ def backup_full(config, group, name):
|
|
|
405
475
|
if state == "LOST":
|
|
406
476
|
logger.critical("Backup tracking lost!")
|
|
407
477
|
logger.warning("Another backup operation started during ours.")
|
|
408
|
-
logger.tip("Enable
|
|
478
|
+
logger.tip("Enable run_status concurrency checks to prevent this.")
|
|
409
479
|
logger.error(f"{result['error_message']}")
|
|
410
480
|
sys.exit(1)
|
|
411
481
|
|
|
482
|
+
except exceptions.InvalidTablesInInventoryError as e:
|
|
483
|
+
error_handler.handle_invalid_tables_in_inventory_error(e, config)
|
|
484
|
+
sys.exit(1)
|
|
412
485
|
except exceptions.ConcurrencyConflictError as e:
|
|
413
486
|
error_handler.handle_concurrency_conflict_error(e, config)
|
|
414
487
|
sys.exit(1)
|
|
@@ -485,13 +558,17 @@ def restore_command(config, target_label, group, table, rename_suffix, yes):
|
|
|
485
558
|
tls_config=cfg.get("tls"),
|
|
486
559
|
)
|
|
487
560
|
|
|
561
|
+
ops_database = config_module.get_ops_database(cfg)
|
|
562
|
+
|
|
488
563
|
with database:
|
|
489
|
-
was_created = schema.ensure_ops_schema(database)
|
|
564
|
+
was_created = schema.ensure_ops_schema(database, ops_database=ops_database)
|
|
490
565
|
if was_created:
|
|
491
566
|
logger.warning(
|
|
492
567
|
"ops schema was auto-created. Please run 'starrocks-br init' after populating config."
|
|
493
568
|
)
|
|
494
|
-
logger.warning(
|
|
569
|
+
logger.warning(
|
|
570
|
+
"Remember to populate the table_inventory table with your backup groups!"
|
|
571
|
+
)
|
|
495
572
|
sys.exit(1) # Exit if schema was just created, requires user action
|
|
496
573
|
|
|
497
574
|
healthy, message = health.check_cluster_health(database)
|
|
@@ -507,7 +584,9 @@ def restore_command(config, target_label, group, table, rename_suffix, yes):
|
|
|
507
584
|
|
|
508
585
|
logger.info(f"Finding restore sequence for target backup: {target_label}")
|
|
509
586
|
|
|
510
|
-
restore_pair = restore.find_restore_pair(
|
|
587
|
+
restore_pair = restore.find_restore_pair(
|
|
588
|
+
database, target_label, ops_database=ops_database
|
|
589
|
+
)
|
|
511
590
|
logger.success(f"Found restore sequence: {' -> '.join(restore_pair)}")
|
|
512
591
|
|
|
513
592
|
logger.info("Determining tables to restore from backup manifest...")
|
|
@@ -518,6 +597,7 @@ def restore_command(config, target_label, group, table, rename_suffix, yes):
|
|
|
518
597
|
group=group,
|
|
519
598
|
table=table,
|
|
520
599
|
database=cfg["database"] if table else None,
|
|
600
|
+
ops_database=ops_database,
|
|
521
601
|
)
|
|
522
602
|
|
|
523
603
|
if not tables_to_restore:
|
|
@@ -535,6 +615,7 @@ def restore_command(config, target_label, group, table, rename_suffix, yes):
|
|
|
535
615
|
tables_to_restore,
|
|
536
616
|
rename_suffix,
|
|
537
617
|
skip_confirmation=yes,
|
|
618
|
+
ops_database=ops_database,
|
|
538
619
|
)
|
|
539
620
|
|
|
540
621
|
if result["success"]:
|
starrocks_br/concurrency.py
CHANGED
|
@@ -1,47 +1,67 @@
|
|
|
1
|
+
# Copyright 2025 deep-bi
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
1
15
|
from typing import Literal
|
|
2
16
|
|
|
3
17
|
from . import exceptions, logger, utils
|
|
4
18
|
|
|
5
19
|
|
|
6
|
-
def reserve_job_slot(db, scope: str, label: str) -> None:
|
|
7
|
-
"""Reserve a job slot in
|
|
20
|
+
def reserve_job_slot(db, scope: str, label: str, ops_database: str = "ops") -> None:
|
|
21
|
+
"""Reserve a job slot in the run_status table to prevent overlapping jobs.
|
|
8
22
|
|
|
9
23
|
We consider any row with state='ACTIVE' for the same scope as a conflict.
|
|
10
24
|
However, we implement self-healing logic to automatically clean up stale locks.
|
|
11
25
|
"""
|
|
12
|
-
active_jobs = _get_active_jobs_for_scope(db, scope)
|
|
26
|
+
active_jobs = _get_active_jobs_for_scope(db, scope, ops_database)
|
|
13
27
|
|
|
14
28
|
if not active_jobs:
|
|
15
|
-
_insert_new_job(db, scope, label)
|
|
29
|
+
_insert_new_job(db, scope, label, ops_database)
|
|
16
30
|
return
|
|
17
31
|
|
|
18
|
-
_handle_active_job_conflicts(db, scope, active_jobs)
|
|
32
|
+
_handle_active_job_conflicts(db, scope, active_jobs, ops_database)
|
|
19
33
|
|
|
20
|
-
_insert_new_job(db, scope, label)
|
|
34
|
+
_insert_new_job(db, scope, label, ops_database)
|
|
21
35
|
|
|
22
36
|
|
|
23
|
-
def _get_active_jobs_for_scope(
|
|
37
|
+
def _get_active_jobs_for_scope(
|
|
38
|
+
db, scope: str, ops_database: str = "ops"
|
|
39
|
+
) -> list[tuple[str, str, str]]:
|
|
24
40
|
"""Get all active jobs for the given scope."""
|
|
25
|
-
rows = db.query(
|
|
41
|
+
rows = db.query(
|
|
42
|
+
f"SELECT scope, label, state FROM {ops_database}.run_status WHERE state = 'ACTIVE'"
|
|
43
|
+
)
|
|
26
44
|
return [row for row in rows if row[0] == scope]
|
|
27
45
|
|
|
28
46
|
|
|
29
|
-
def _handle_active_job_conflicts(
|
|
47
|
+
def _handle_active_job_conflicts(
|
|
48
|
+
db, scope: str, active_jobs: list[tuple[str, str, str]], ops_database: str = "ops"
|
|
49
|
+
) -> None:
|
|
30
50
|
"""Handle conflicts with active jobs, cleaning up stale ones where possible."""
|
|
31
51
|
for active_scope, active_label, _ in active_jobs:
|
|
32
|
-
if _can_heal_stale_job(active_scope, active_label, db):
|
|
33
|
-
_cleanup_stale_job(db, active_scope, active_label)
|
|
52
|
+
if _can_heal_stale_job(active_scope, active_label, db, ops_database):
|
|
53
|
+
_cleanup_stale_job(db, active_scope, active_label, ops_database)
|
|
34
54
|
logger.success(f"Cleaned up stale backup job: {active_label}")
|
|
35
55
|
else:
|
|
36
56
|
_raise_concurrency_conflict(scope, active_jobs)
|
|
37
57
|
|
|
38
58
|
|
|
39
|
-
def _can_heal_stale_job(scope: str, label: str, db) -> bool:
|
|
59
|
+
def _can_heal_stale_job(scope: str, label: str, db, ops_database: str = "ops") -> bool:
|
|
40
60
|
"""Check if a stale job can be healed (only for backup jobs)."""
|
|
41
61
|
if scope != "backup":
|
|
42
62
|
return False
|
|
43
63
|
|
|
44
|
-
return _is_backup_job_stale(db, label)
|
|
64
|
+
return _is_backup_job_stale(db, label, ops_database)
|
|
45
65
|
|
|
46
66
|
|
|
47
67
|
def _raise_concurrency_conflict(scope: str, active_jobs: list[tuple[str, str, str]]) -> None:
|
|
@@ -49,22 +69,22 @@ def _raise_concurrency_conflict(scope: str, active_jobs: list[tuple[str, str, st
|
|
|
49
69
|
raise exceptions.ConcurrencyConflictError(scope, active_jobs)
|
|
50
70
|
|
|
51
71
|
|
|
52
|
-
def _insert_new_job(db, scope: str, label: str) -> None:
|
|
72
|
+
def _insert_new_job(db, scope: str, label: str, ops_database: str = "ops") -> None:
|
|
53
73
|
"""Insert a new active job record."""
|
|
54
74
|
sql = f"""
|
|
55
|
-
INSERT INTO
|
|
75
|
+
INSERT INTO {ops_database}.run_status (scope, label, state, started_at)
|
|
56
76
|
VALUES ({utils.quote_value(scope)}, {utils.quote_value(label)}, 'ACTIVE', NOW())
|
|
57
77
|
"""
|
|
58
78
|
db.execute(sql)
|
|
59
79
|
|
|
60
80
|
|
|
61
|
-
def _is_backup_job_stale(db, label: str) -> bool:
|
|
81
|
+
def _is_backup_job_stale(db, label: str, ops_database: str = "ops") -> bool:
|
|
62
82
|
"""Check if a backup job is stale by querying StarRocks SHOW BACKUP.
|
|
63
83
|
|
|
64
84
|
Returns True if the job is stale (not actually running), False if it's still active.
|
|
65
85
|
"""
|
|
66
86
|
try:
|
|
67
|
-
user_databases = _get_user_databases(db)
|
|
87
|
+
user_databases = _get_user_databases(db, ops_database)
|
|
68
88
|
|
|
69
89
|
for database_name in user_databases:
|
|
70
90
|
job_status = _check_backup_job_in_database(db, database_name, label)
|
|
@@ -84,9 +104,9 @@ def _is_backup_job_stale(db, label: str) -> bool:
|
|
|
84
104
|
return False
|
|
85
105
|
|
|
86
106
|
|
|
87
|
-
def _get_user_databases(db) -> list[str]:
|
|
107
|
+
def _get_user_databases(db, ops_database: str = "ops") -> list[str]:
|
|
88
108
|
"""Get list of user databases (excluding system databases)."""
|
|
89
|
-
system_databases = {"information_schema", "mysql", "sys",
|
|
109
|
+
system_databases = {"information_schema", "mysql", "sys", ops_database}
|
|
90
110
|
|
|
91
111
|
databases = db.query("SHOW DATABASES")
|
|
92
112
|
return [
|
|
@@ -145,10 +165,10 @@ def _extract_backup_info(result) -> tuple[str, str]:
|
|
|
145
165
|
return snapshot_name, state
|
|
146
166
|
|
|
147
167
|
|
|
148
|
-
def _cleanup_stale_job(db, scope: str, label: str) -> None:
|
|
168
|
+
def _cleanup_stale_job(db, scope: str, label: str, ops_database: str = "ops") -> None:
|
|
149
169
|
"""Clean up a stale job by updating its state to CANCELLED."""
|
|
150
170
|
sql = f"""
|
|
151
|
-
UPDATE
|
|
171
|
+
UPDATE {ops_database}.run_status
|
|
152
172
|
SET state='CANCELLED', finished_at=NOW()
|
|
153
173
|
WHERE scope={utils.quote_value(scope)} AND label={utils.quote_value(label)} AND state='ACTIVE'
|
|
154
174
|
"""
|
|
@@ -156,14 +176,18 @@ def _cleanup_stale_job(db, scope: str, label: str) -> None:
|
|
|
156
176
|
|
|
157
177
|
|
|
158
178
|
def complete_job_slot(
|
|
159
|
-
db,
|
|
179
|
+
db,
|
|
180
|
+
scope: str,
|
|
181
|
+
label: str,
|
|
182
|
+
final_state: Literal["FINISHED", "FAILED", "CANCELLED"],
|
|
183
|
+
ops_database: str = "ops",
|
|
160
184
|
) -> None:
|
|
161
185
|
"""Complete job slot and persist final state.
|
|
162
186
|
|
|
163
187
|
Simple approach: update the same row by scope/label.
|
|
164
188
|
"""
|
|
165
189
|
sql = f"""
|
|
166
|
-
UPDATE
|
|
190
|
+
UPDATE {ops_database}.run_status
|
|
167
191
|
SET state={utils.quote_value(final_state)}, finished_at=NOW()
|
|
168
192
|
WHERE scope={utils.quote_value(scope)} AND label={utils.quote_value(label)}
|
|
169
193
|
"""
|
starrocks_br/config.py
CHANGED
|
@@ -1,3 +1,17 @@
|
|
|
1
|
+
# Copyright 2025 deep-bi
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
1
15
|
from typing import Any
|
|
2
16
|
|
|
3
17
|
import yaml
|
|
@@ -43,6 +57,34 @@ def validate_config(config: dict[str, Any]) -> None:
|
|
|
43
57
|
raise exceptions.ConfigValidationError(f"Missing required config field: {field}")
|
|
44
58
|
|
|
45
59
|
_validate_tls_section(config.get("tls"))
|
|
60
|
+
_validate_table_inventory_section(config.get("table_inventory"))
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def get_ops_database(config: dict[str, Any]) -> str:
|
|
64
|
+
"""Get the ops database name from config, defaulting to 'ops'."""
|
|
65
|
+
return config.get("ops_database", "ops")
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def get_table_inventory_entries(config: dict[str, Any]) -> list[tuple[str, str, str]]:
|
|
69
|
+
"""Extract table inventory entries from config.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
config: Configuration dictionary
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
List of tuples (group, database, table)
|
|
76
|
+
"""
|
|
77
|
+
table_inventory = config.get("table_inventory")
|
|
78
|
+
if not table_inventory:
|
|
79
|
+
return []
|
|
80
|
+
|
|
81
|
+
entries = []
|
|
82
|
+
for group_entry in table_inventory:
|
|
83
|
+
group = group_entry["group"]
|
|
84
|
+
for table_entry in group_entry["tables"]:
|
|
85
|
+
entries.append((group, table_entry["database"], table_entry["table"]))
|
|
86
|
+
|
|
87
|
+
return entries
|
|
46
88
|
|
|
47
89
|
|
|
48
90
|
def _validate_tls_section(tls_config) -> None:
|
|
@@ -74,3 +116,50 @@ def _validate_tls_section(tls_config) -> None:
|
|
|
74
116
|
raise exceptions.ConfigValidationError(
|
|
75
117
|
"TLS configuration field 'tls_versions' must be a list of strings if provided"
|
|
76
118
|
)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def _validate_table_inventory_section(table_inventory) -> None:
|
|
122
|
+
if table_inventory is None:
|
|
123
|
+
return
|
|
124
|
+
|
|
125
|
+
if not isinstance(table_inventory, list):
|
|
126
|
+
raise exceptions.ConfigValidationError("'table_inventory' must be a list")
|
|
127
|
+
|
|
128
|
+
for entry in table_inventory:
|
|
129
|
+
if not isinstance(entry, dict):
|
|
130
|
+
raise exceptions.ConfigValidationError(
|
|
131
|
+
"Each entry in 'table_inventory' must be a dictionary"
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
if "group" not in entry:
|
|
135
|
+
raise exceptions.ConfigValidationError(
|
|
136
|
+
"Each entry in 'table_inventory' must have a 'group' field"
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
if "tables" not in entry:
|
|
140
|
+
raise exceptions.ConfigValidationError(
|
|
141
|
+
"Each entry in 'table_inventory' must have a 'tables' field"
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
if not isinstance(entry["group"], str):
|
|
145
|
+
raise exceptions.ConfigValidationError("'group' field must be a string")
|
|
146
|
+
|
|
147
|
+
tables = entry["tables"]
|
|
148
|
+
if not isinstance(tables, list):
|
|
149
|
+
raise exceptions.ConfigValidationError("'tables' field must be a list")
|
|
150
|
+
|
|
151
|
+
for table_entry in tables:
|
|
152
|
+
if not isinstance(table_entry, dict):
|
|
153
|
+
raise exceptions.ConfigValidationError("Each table entry must be a dictionary")
|
|
154
|
+
|
|
155
|
+
if "database" not in table_entry or "table" not in table_entry:
|
|
156
|
+
raise exceptions.ConfigValidationError(
|
|
157
|
+
"Each table entry must have 'database' and 'table' fields"
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
if not isinstance(table_entry["database"], str) or not isinstance(
|
|
161
|
+
table_entry["table"], str
|
|
162
|
+
):
|
|
163
|
+
raise exceptions.ConfigValidationError(
|
|
164
|
+
"'database' and 'table' fields must be strings"
|
|
165
|
+
)
|
starrocks_br/db.py
CHANGED
|
@@ -1,3 +1,17 @@
|
|
|
1
|
+
# Copyright 2025 deep-bi
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
1
15
|
from typing import Any
|
|
2
16
|
|
|
3
17
|
import mysql.connector
|