starrocks-br 0.5.1__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,33 @@
1
+ # Copyright 2025 deep-bi
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
1
15
  import click
2
16
 
17
+ from . import config as config_module
3
18
  from . import exceptions
4
19
 
5
20
 
21
+ def _get_ops_database_name(config_path: str | None) -> str:
22
+ if not config_path:
23
+ return "ops"
24
+ try:
25
+ cfg = config_module.load_config(config_path)
26
+ return config_module.get_ops_database(cfg)
27
+ except Exception:
28
+ return "ops"
29
+
30
+
6
31
  def display_structured_error(
7
32
  title: str,
8
33
  reason: str,
@@ -60,13 +85,14 @@ def handle_missing_option_error(exc: exceptions.MissingOptionError, config: str
60
85
  def handle_backup_label_not_found_error(
61
86
  exc: exceptions.BackupLabelNotFoundError, config: str = None
62
87
  ) -> None:
88
+ ops_db = _get_ops_database_name(config)
63
89
  display_structured_error(
64
90
  title="RESTORE FAILED",
65
91
  reason=f'The backup label "{exc.label}" does not exist in the repository'
66
92
  + (f' "{exc.repository}"' if exc.repository else "")
67
93
  + ",\nor the backup did not complete successfully.",
68
94
  what_to_do=[
69
- "List available backups by querying the backup history table:\n SELECT label, backup_type, status, finished_at FROM ops.backup_history ORDER BY finished_at DESC;",
95
+ f"List available backups by querying the backup history table:\n SELECT label, backup_type, status, finished_at FROM {ops_db}.backup_history ORDER BY finished_at DESC;",
70
96
  "Check whether the backup completed successfully using StarRocks SQL:"
71
97
  + (
72
98
  f"\n SHOW BACKUP FROM `{exc.repository}`;"
@@ -83,11 +109,12 @@ def handle_backup_label_not_found_error(
83
109
  def handle_no_successful_full_backup_found_error(
84
110
  exc: exceptions.NoSuccessfulFullBackupFoundError, config: str = None
85
111
  ) -> None:
112
+ ops_db = _get_ops_database_name(config)
86
113
  display_structured_error(
87
114
  title="RESTORE FAILED",
88
115
  reason=f'No successful full backup was found before the incremental backup "{exc.incremental_label}".\nIncremental backups require a base full backup to restore from.',
89
116
  what_to_do=[
90
- "Verify that a full backup was created before this incremental backup:\n SELECT label, backup_type, status, finished_at FROM ops.backup_history WHERE backup_type = 'full' AND status = 'FINISHED' ORDER BY finished_at DESC;",
117
+ f"Verify that a full backup was created before this incremental backup:\n SELECT label, backup_type, status, finished_at FROM {ops_db}.backup_history WHERE backup_type = 'full' AND status = 'FINISHED' ORDER BY finished_at DESC;",
91
118
  "Run a full backup first:\n starrocks-br backup full --config "
92
119
  + (config if config else "<config.yaml>")
93
120
  + " --group <group_name>",
@@ -101,13 +128,14 @@ def handle_no_successful_full_backup_found_error(
101
128
  def handle_table_not_found_in_backup_error(
102
129
  exc: exceptions.TableNotFoundInBackupError, config: str = None
103
130
  ) -> None:
131
+ ops_db = _get_ops_database_name(config)
104
132
  display_structured_error(
105
133
  title="TABLE NOT FOUND",
106
134
  reason=f'Table "{exc.table}" was not found in backup "{exc.label}" for database "{exc.database}".',
107
135
  what_to_do=[
108
136
  "List all tables in the backup:"
109
137
  + (
110
- f"\n SELECT DISTINCT database_name, table_name FROM ops.backup_partitions WHERE label = '{exc.label}';"
138
+ f"\n SELECT DISTINCT database_name, table_name FROM {ops_db}.backup_partitions WHERE label = '{exc.label}';"
111
139
  if config
112
140
  else ""
113
141
  ),
@@ -187,13 +215,14 @@ def handle_cluster_health_check_failed_error(
187
215
  def handle_snapshot_not_found_error(
188
216
  exc: exceptions.SnapshotNotFoundError, config: str = None
189
217
  ) -> None:
218
+ ops_db = _get_ops_database_name(config)
190
219
  display_structured_error(
191
220
  title="SNAPSHOT NOT FOUND",
192
221
  reason=f'Snapshot "{exc.snapshot_name}" was not found in repository "{exc.repository}".',
193
222
  what_to_do=[
194
223
  f"List available snapshots:\n SHOW SNAPSHOT ON {exc.repository};",
195
224
  "Verify the snapshot name spelling is correct",
196
- "Ensure the backup completed successfully:\n SELECT * FROM ops.backup_history WHERE label = '"
225
+ f"Ensure the backup completed successfully:\n SELECT * FROM {ops_db}.backup_history WHERE label = '"
197
226
  + exc.snapshot_name
198
227
  + "';",
199
228
  ],
@@ -205,13 +234,14 @@ def handle_snapshot_not_found_error(
205
234
  def handle_no_partitions_found_error(
206
235
  exc: exceptions.NoPartitionsFoundError, config: str = None, group: str = None
207
236
  ) -> None:
237
+ ops_db = _get_ops_database_name(config)
208
238
  display_structured_error(
209
239
  title="NO PARTITIONS FOUND",
210
240
  reason="No partitions were found to backup"
211
241
  + (f" for group '{exc.group_name}'" if exc.group_name else "")
212
242
  + ".",
213
243
  what_to_do=[
214
- "Verify that the inventory group exists in ops.table_inventory:\n SELECT * FROM ops.table_inventory WHERE inventory_group = "
244
+ f"Verify that the inventory group exists in {ops_db}.table_inventory:\n SELECT * FROM {ops_db}.table_inventory WHERE inventory_group = "
215
245
  + (f"'{exc.group_name}';" if exc.group_name else "'<your_group>';"),
216
246
  "Check that the tables in the group have partitions",
217
247
  "Ensure the baseline backup date is correct",
@@ -224,6 +254,7 @@ def handle_no_partitions_found_error(
224
254
  def handle_no_tables_found_error(
225
255
  exc: exceptions.NoTablesFoundError, config: str = None, target_label: str = None
226
256
  ) -> None:
257
+ ops_db = _get_ops_database_name(config)
227
258
  display_structured_error(
228
259
  title="NO TABLES FOUND",
229
260
  reason="No tables were found"
@@ -236,12 +267,12 @@ def handle_no_tables_found_error(
236
267
  )
237
268
  + ".",
238
269
  what_to_do=[
239
- "Verify that tables exist in the backup manifest:\n SELECT DISTINCT database_name, table_name FROM ops.backup_partitions WHERE label = "
270
+ f"Verify that tables exist in the backup manifest:\n SELECT DISTINCT database_name, table_name FROM {ops_db}.backup_partitions WHERE label = "
240
271
  + (f"'{exc.label}';" if exc.label else "'<label>';"),
241
- "Check that the group name is correct in ops.table_inventory"
272
+ f"Check that the group name is correct in {ops_db}.table_inventory"
242
273
  if exc.group
243
274
  else "Verify the backup completed successfully",
244
- "List available backups:\n SELECT label, backup_type, status, finished_at FROM ops.backup_history ORDER BY finished_at DESC;",
275
+ f"List available backups:\n SELECT label, backup_type, status, finished_at FROM {ops_db}.backup_history ORDER BY finished_at DESC;",
245
276
  ],
246
277
  inputs={
247
278
  "--target-label": exc.label or target_label,
@@ -268,6 +299,7 @@ def handle_restore_operation_cancelled_error() -> None:
268
299
  def handle_concurrency_conflict_error(
269
300
  exc: exceptions.ConcurrencyConflictError, config: str = None
270
301
  ) -> None:
302
+ ops_db = _get_ops_database_name(config)
271
303
  active_job_strings = [f"{job[0]}:{job[1]}" for job in exc.active_jobs]
272
304
  first_label = exc.active_labels[0] if exc.active_labels else "unknown"
273
305
 
@@ -276,8 +308,8 @@ def handle_concurrency_conflict_error(
276
308
  reason=f"Another '{exc.scope}' job is already running.\nOnly one job of the same type can run at a time to prevent conflicts.",
277
309
  what_to_do=[
278
310
  f"Wait for the active job to complete: {', '.join(active_job_strings)}",
279
- f"Check the job status in ops.run_status:\n SELECT * FROM ops.run_status WHERE label = '{first_label}' AND state = 'ACTIVE';",
280
- f"If the job is stuck, cancel it manually:\n UPDATE ops.run_status SET state = 'CANCELLED' WHERE label = '{first_label}' AND state = 'ACTIVE';",
311
+ f"Check the job status in {ops_db}.run_status:\n SELECT * FROM {ops_db}.run_status WHERE label = '{first_label}' AND state = 'ACTIVE';",
312
+ f"If the job is stuck, cancel it manually:\n UPDATE {ops_db}.run_status SET state = 'CANCELLED' WHERE label = '{first_label}' AND state = 'ACTIVE';",
281
313
  "Verify the job is not actually running in StarRocks before cancelling it",
282
314
  ],
283
315
  inputs={
@@ -285,13 +317,14 @@ def handle_concurrency_conflict_error(
285
317
  "Scope": exc.scope,
286
318
  "Active jobs": ", ".join(active_job_strings),
287
319
  },
288
- help_links=["Check ops.run_status table for job status"],
320
+ help_links=[f"Check {ops_db}.run_status table for job status"],
289
321
  )
290
322
 
291
323
 
292
324
  def handle_no_full_backup_found_error(
293
325
  exc: exceptions.NoFullBackupFoundError, config: str = None, group: str = None
294
326
  ) -> None:
327
+ ops_db = _get_ops_database_name(config)
295
328
  display_structured_error(
296
329
  title="NO FULL BACKUP FOUND",
297
330
  reason=f"No successful full backup was found for database '{exc.database}'.\nIncremental backups require a baseline full backup to compare against.",
@@ -299,9 +332,37 @@ def handle_no_full_backup_found_error(
299
332
  "Run a full backup first:\n starrocks-br backup full --config "
300
333
  + (config if config else "<config.yaml>")
301
334
  + f" --group {group if group else '<group_name>'}",
302
- f"Verify no full backups exist for this database:\n SELECT label, backup_type, status, finished_at FROM ops.backup_history WHERE backup_type = 'full' AND label LIKE '{exc.database}_%' ORDER BY finished_at DESC;",
335
+ f"Verify no full backups exist for this database:\n SELECT label, backup_type, status, finished_at FROM {ops_db}.backup_history WHERE backup_type = 'full' AND label LIKE '{exc.database}_%' ORDER BY finished_at DESC;",
303
336
  "After the full backup completes successfully, retry the incremental backup",
304
337
  ],
305
338
  inputs={"Database": exc.database, "--config": config, "--group": group},
306
339
  help_links=["starrocks-br backup full --help"],
307
340
  )
341
+
342
+
343
+ def handle_invalid_tables_in_inventory_error(
344
+ exc: exceptions.InvalidTablesInInventoryError, config: str = None
345
+ ) -> None:
346
+ ops_db = _get_ops_database_name(config)
347
+ invalid_tables_str = ", ".join(f"'{t}'" for t in exc.invalid_tables)
348
+
349
+ display_structured_error(
350
+ title="INVALID TABLES IN INVENTORY",
351
+ reason=f"The following table(s) in the inventory do not exist in database '{exc.database}':\n{invalid_tables_str}\n\nThese tables are referenced in the table inventory but cannot be found in the actual database.",
352
+ what_to_do=[
353
+ f"Remove invalid tables from the table inventory:\n DELETE FROM {ops_db}.table_inventory WHERE database_name = '{exc.database}' AND table_name IN ({invalid_tables_str});",
354
+ "Verify the table names are spelled correctly in the inventory",
355
+ f"Check which tables exist in the database:\n SHOW TABLES FROM `{exc.database}`;",
356
+ f"Update the inventory with correct table names:\n UPDATE {ops_db}.table_inventory SET table_name = '<correct_name>' WHERE database_name = '{exc.database}' AND table_name = '<wrong_name>';",
357
+ ],
358
+ inputs={
359
+ "Database": exc.database,
360
+ "Invalid tables": invalid_tables_str,
361
+ "Group": exc.group,
362
+ "--config": config,
363
+ },
364
+ help_links=[
365
+ f"Check {ops_db}.table_inventory for your inventory configuration",
366
+ "Run 'SHOW TABLES' to see available tables",
367
+ ],
368
+ )
@@ -1,3 +1,18 @@
1
+ # Copyright 2025 deep-bi
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
1
16
  class StarRocksBRError(Exception):
2
17
  pass
3
18
 
@@ -107,3 +122,17 @@ class NoFullBackupFoundError(StarRocksBRError):
107
122
  def __init__(self, database: str):
108
123
  self.database = database
109
124
  super().__init__(f"No successful full backup found for database '{database}'")
125
+
126
+
127
+ class InvalidTablesInInventoryError(StarRocksBRError):
128
+ def __init__(self, database: str, invalid_tables: list[str], group: str = None):
129
+ self.database = database
130
+ self.invalid_tables = invalid_tables
131
+ self.group = group
132
+ tables_str = ", ".join(f"'{t}'" for t in invalid_tables)
133
+ if group:
134
+ super().__init__(
135
+ f"Invalid tables in inventory group '{group}' for database '{database}': {tables_str}"
136
+ )
137
+ else:
138
+ super().__init__(f"Invalid tables for database '{database}': {tables_str}")
starrocks_br/executor.py CHANGED
@@ -1,3 +1,17 @@
1
+ # Copyright 2025 deep-bi
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
1
15
  import re
2
16
  import time
3
17
  from typing import Literal
@@ -167,6 +181,7 @@ def execute_backup(
167
181
  backup_type: Literal["incremental", "full"] = None,
168
182
  scope: str = "backup",
169
183
  database: str | None = None,
184
+ ops_database: str = "ops",
170
185
  ) -> dict:
171
186
  """Execute a complete backup workflow: submit command and monitor progress.
172
187
 
@@ -179,6 +194,7 @@ def execute_backup(
179
194
  backup_type: Type of backup (for logging)
180
195
  scope: Job scope (for concurrency control)
181
196
  database: Database name (required for SHOW BACKUP)
197
+ ops_database: Name of ops database (default: "ops")
182
198
 
183
199
  Returns dictionary with keys: success, final_status, error_message
184
200
  """
@@ -219,13 +235,18 @@ def execute_backup(
219
235
  "finished_at": finished_at,
220
236
  "error_message": None if success else (final_status["state"] or ""),
221
237
  },
238
+ ops_database=ops_database,
222
239
  )
223
240
  except Exception:
224
241
  pass
225
242
 
226
243
  try:
227
244
  concurrency.complete_job_slot(
228
- db, scope=scope, label=label, final_state=final_status["state"]
245
+ db,
246
+ scope=scope,
247
+ label=label,
248
+ final_state=final_status["state"],
249
+ ops_database=ops_database,
229
250
  )
230
251
  except Exception:
231
252
  pass
@@ -257,7 +278,7 @@ def _build_error_message(final_status: dict, label: str, database: str) -> str:
257
278
  f"Backup tracking lost for '{label}' in database '{database}'. "
258
279
  f"Another backup operation overwrote the last backup status visible in SHOW BACKUP. "
259
280
  f"This indicates a concurrency issue - only one backup per database should run at a time. "
260
- f"Recommendation: Use ops.run_status concurrency control to prevent simultaneous backups, "
281
+ f"Recommendation: Use run_status concurrency control to prevent simultaneous backups, "
261
282
  f"or verify if another tool/user is running backups on this database."
262
283
  )
263
284
  elif state == "CANCELLED":
@@ -297,12 +318,11 @@ def _extract_label_from_command(backup_command: str) -> str:
297
318
  parts = line.split()
298
319
  for i, part in enumerate(parts):
299
320
  if part == "SNAPSHOT" and i + 1 < len(parts):
300
- return parts[i + 1]
321
+ return parts[i + 1].strip("`")
301
322
  elif line.startswith("BACKUP SNAPSHOT"):
302
- # Legacy syntax
303
323
  parts = line.split()
304
324
  if len(parts) >= 3:
305
- return parts[2]
325
+ return parts[2].strip("`")
306
326
 
307
327
  return "unknown_backup"
308
328
 
@@ -311,6 +331,9 @@ def _extract_database_from_command(backup_command: str) -> str:
311
331
  """Extract the database name from a backup command.
312
332
 
313
333
  Parses: BACKUP DATABASE db_name SNAPSHOT label ...
334
+
335
+ Strips backticks from identifiers since they are only used for
336
+ SQL quoting purposes.
314
337
  """
315
338
  lines = backup_command.strip().split("\n")
316
339
 
@@ -319,6 +342,6 @@ def _extract_database_from_command(backup_command: str) -> str:
319
342
  if line.startswith("BACKUP DATABASE"):
320
343
  parts = line.split()
321
344
  if len(parts) >= 3:
322
- return parts[2]
345
+ return parts[2].strip("`")
323
346
 
324
347
  return "unknown_database"
starrocks_br/health.py CHANGED
@@ -1,3 +1,18 @@
1
+ # Copyright 2025 deep-bi
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
1
16
  def check_cluster_health(db) -> tuple[bool, str]:
2
17
  """Check FE/BE health via SHOW FRONTENDS/BACKENDS.
3
18
 
starrocks_br/history.py CHANGED
@@ -1,8 +1,22 @@
1
+ # Copyright 2025 deep-bi
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
1
15
  from . import logger
2
16
 
3
17
 
4
- def log_backup(db, entry: dict[str, str | None]) -> None:
5
- """Write a backup history entry to ops.backup_history.
18
+ def log_backup(db, entry: dict[str, str | None], ops_database: str = "ops") -> None:
19
+ """Write a backup history entry to the backup_history table.
6
20
 
7
21
  Expected keys in entry:
8
22
  - job_id (optional; auto-generated if missing)
@@ -28,7 +42,7 @@ def log_backup(db, entry: dict[str, str | None]) -> None:
28
42
  return "'" + str(val).replace("'", "''") + "'"
29
43
 
30
44
  sql = f"""
31
- INSERT INTO ops.backup_history (
45
+ INSERT INTO {ops_database}.backup_history (
32
46
  label, backup_type, status, repository, started_at, finished_at, error_message
33
47
  ) VALUES (
34
48
  {esc(label)}, {esc(backup_type)}, {esc(status)}, {esc(repository)},
@@ -43,8 +57,8 @@ def log_backup(db, entry: dict[str, str | None]) -> None:
43
57
  raise
44
58
 
45
59
 
46
- def log_restore(db, entry: dict[str, str | None]) -> None:
47
- """Write a restore history entry to ops.restore_history.
60
+ def log_restore(db, entry: dict[str, str | None], ops_database: str = "ops") -> None:
61
+ """Write a restore history entry to the restore_history table.
48
62
 
49
63
  Expected keys in entry:
50
64
  - job_id
@@ -73,12 +87,12 @@ def log_restore(db, entry: dict[str, str | None]) -> None:
73
87
  return "'" + str(val).replace("'", "''") + "'"
74
88
 
75
89
  sql = f"""
76
- INSERT INTO ops.restore_history (
77
- job_id, backup_label, restore_type, status, repository,
90
+ INSERT INTO {ops_database}.restore_history (
91
+ job_id, backup_label, restore_type, status, repository,
78
92
  started_at, finished_at, error_message, verification_checksum
79
93
  ) VALUES (
80
- {esc(job_id)}, {esc(backup_label)}, {esc(restore_type)}, {esc(status)},
81
- {esc(repository)}, {esc(started_at)}, {esc(finished_at)},
94
+ {esc(job_id)}, {esc(backup_label)}, {esc(restore_type)}, {esc(status)},
95
+ {esc(repository)}, {esc(started_at)}, {esc(finished_at)},
82
96
  {esc(error_message)}, {esc(verification_checksum)}
83
97
  )
84
98
  """
starrocks_br/labels.py CHANGED
@@ -1,3 +1,17 @@
1
+ # Copyright 2025 deep-bi
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
1
15
  from datetime import datetime
2
16
  from typing import Literal
3
17
 
@@ -7,12 +21,13 @@ def determine_backup_label(
7
21
  backup_type: Literal["incremental", "full"],
8
22
  database_name: str,
9
23
  custom_name: str | None = None,
24
+ ops_database: str = "ops",
10
25
  ) -> str:
11
26
  """Determine a unique backup label for the given parameters.
12
27
 
13
28
  This is the single entry point for all backup label generation. It handles both
14
29
  custom names and auto-generated date-based labels, ensuring uniqueness by checking
15
- the ops.backup_history table.
30
+ the backup_history table in the configured ops database.
16
31
 
17
32
  Args:
18
33
  db: Database connection
@@ -20,6 +35,7 @@ def determine_backup_label(
20
35
  database_name: Name of the database being backed up
21
36
  custom_name: Optional custom name for the backup. If provided, this becomes
22
37
  the base label. If None, generates a date-based label.
38
+ ops_database: Name of the database containing operational tables. Defaults to "ops".
23
39
 
24
40
  Returns:
25
41
  Unique label string that doesn't conflict with existing backups
@@ -30,9 +46,9 @@ def determine_backup_label(
30
46
  today = datetime.now().strftime("%Y%m%d")
31
47
  base_label = f"{database_name}_{today}_{backup_type}"
32
48
 
33
- query = """
49
+ query = f"""
34
50
  SELECT label
35
- FROM ops.backup_history
51
+ FROM {ops_database}.backup_history
36
52
  WHERE label LIKE %s
37
53
  ORDER BY label
38
54
  """
starrocks_br/logger.py CHANGED
@@ -1,3 +1,17 @@
1
+ # Copyright 2025 deep-bi
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
1
15
  import logging
2
16
  import threading
3
17
 
starrocks_br/planner.py CHANGED
@@ -1,10 +1,24 @@
1
+ # Copyright 2025 deep-bi
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
1
15
  import datetime
2
16
  import hashlib
3
17
 
4
18
  from starrocks_br import exceptions, logger, timezone, utils
5
19
 
6
20
 
7
- def find_latest_full_backup(db, database: str) -> dict[str, str] | None:
21
+ def find_latest_full_backup(db, database: str, ops_database: str = "ops") -> dict[str, str] | None:
8
22
  """Find the latest successful full backup for a database.
9
23
 
10
24
  Args:
@@ -17,7 +31,7 @@ def find_latest_full_backup(db, database: str) -> dict[str, str] | None:
17
31
  """
18
32
  query = f"""
19
33
  SELECT label, backup_type, finished_at
20
- FROM ops.backup_history
34
+ FROM {ops_database}.backup_history
21
35
  WHERE backup_type = 'full'
22
36
  AND status = 'FINISHED'
23
37
  AND label LIKE {utils.quote_value(f"{database}_%")}
@@ -42,7 +56,7 @@ def find_latest_full_backup(db, database: str) -> dict[str, str] | None:
42
56
  return {"label": row[0], "backup_type": row[1], "finished_at": finished_at}
43
57
 
44
58
 
45
- def find_tables_by_group(db, group_name: str) -> list[dict[str, str]]:
59
+ def find_tables_by_group(db, group_name: str, ops_database: str = "ops") -> list[dict[str, str]]:
46
60
  """Find tables belonging to a specific inventory group.
47
61
 
48
62
  Returns list of dictionaries with keys: database, table.
@@ -50,7 +64,7 @@ def find_tables_by_group(db, group_name: str) -> list[dict[str, str]]:
50
64
  """
51
65
  query = f"""
52
66
  SELECT database_name, table_name
53
- FROM ops.table_inventory
67
+ FROM {ops_database}.table_inventory
54
68
  WHERE inventory_group = {utils.quote_value(group_name)}
55
69
  ORDER BY database_name, table_name
56
70
  """
@@ -58,8 +72,49 @@ def find_tables_by_group(db, group_name: str) -> list[dict[str, str]]:
58
72
  return [{"database": row[0], "table": row[1]} for row in rows]
59
73
 
60
74
 
75
+ def validate_tables_exist(
76
+ db, database: str, tables: list[dict[str, str]], group: str = None
77
+ ) -> None:
78
+ """Validate that tables in the inventory actually exist in the database.
79
+
80
+ Args:
81
+ db: Database connection
82
+ database: Database name to validate tables against
83
+ tables: List of tables with keys: database, table
84
+ group: Optional inventory group name for better error messages
85
+
86
+ Raises:
87
+ InvalidTablesInInventoryError: If any tables don't exist in the database
88
+ """
89
+ if not tables:
90
+ return
91
+
92
+ db_tables = [t for t in tables if t["database"] == database and t["table"] != "*"]
93
+
94
+ if not db_tables:
95
+ return
96
+
97
+ show_tables_query = f"SHOW TABLES FROM {utils.quote_identifier(database)}"
98
+ existing_tables_rows = db.query(show_tables_query)
99
+ existing_tables = {row[0] for row in existing_tables_rows}
100
+
101
+ invalid_tables = []
102
+ for table_entry in db_tables:
103
+ table_name = table_entry["table"]
104
+ if table_name not in existing_tables:
105
+ invalid_tables.append(table_name)
106
+
107
+ if invalid_tables:
108
+ raise exceptions.InvalidTablesInInventoryError(database, invalid_tables, group)
109
+
110
+
61
111
  def find_recent_partitions(
62
- db, database: str, baseline_backup_label: str | None = None, *, group_name: str
112
+ db,
113
+ database: str,
114
+ baseline_backup_label: str | None = None,
115
+ *,
116
+ group_name: str,
117
+ ops_database: str = "ops",
63
118
  ) -> list[dict[str, str]]:
64
119
  """Find partitions updated since baseline for tables in the given inventory group.
65
120
 
@@ -77,7 +132,7 @@ def find_recent_partitions(
77
132
  if baseline_backup_label:
78
133
  baseline_query = f"""
79
134
  SELECT finished_at
80
- FROM ops.backup_history
135
+ FROM {ops_database}.backup_history
81
136
  WHERE label = {utils.quote_value(baseline_backup_label)}
82
137
  AND status = 'FINISHED'
83
138
  """
@@ -86,7 +141,7 @@ def find_recent_partitions(
86
141
  raise exceptions.BackupLabelNotFoundError(baseline_backup_label)
87
142
  baseline_time_raw = baseline_rows[0][0]
88
143
  else:
89
- latest_backup = find_latest_full_backup(db, database)
144
+ latest_backup = find_latest_full_backup(db, database, ops_database)
90
145
  if not latest_backup:
91
146
  raise exceptions.NoFullBackupFoundError(database)
92
147
  baseline_time_raw = latest_backup["finished_at"]
@@ -100,7 +155,7 @@ def find_recent_partitions(
100
155
 
101
156
  baseline_dt = timezone.parse_datetime_with_tz(baseline_time_str, cluster_tz)
102
157
 
103
- group_tables = find_tables_by_group(db, group_name)
158
+ group_tables = find_tables_by_group(db, group_name, ops_database)
104
159
 
105
160
  if not group_tables:
106
161
  return []
@@ -204,7 +259,7 @@ def build_incremental_backup_command(
204
259
 
205
260
 
206
261
  def build_full_backup_command(
207
- db, group_name: str, repository: str, label: str, database: str
262
+ db, group_name: str, repository: str, label: str, database: str, ops_database: str = "ops"
208
263
  ) -> str:
209
264
  """Build BACKUP command for an inventory group.
210
265
 
@@ -212,7 +267,7 @@ def build_full_backup_command(
212
267
  simple BACKUP DATABASE command. Otherwise, generate ON (TABLE ...) list for
213
268
  the specific tables within the database.
214
269
  """
215
- tables = find_tables_by_group(db, group_name)
270
+ tables = find_tables_by_group(db, group_name, ops_database)
216
271
 
217
272
  db_entries = [t for t in tables if t["database"] == database]
218
273
  if not db_entries:
@@ -231,8 +286,10 @@ def build_full_backup_command(
231
286
  ON ({on_clause})"""
232
287
 
233
288
 
234
- def record_backup_partitions(db, label: str, partitions: list[dict[str, str]]) -> None:
235
- """Record partition metadata for a backup in ops.backup_partitions table.
289
+ def record_backup_partitions(
290
+ db, label: str, partitions: list[dict[str, str]], ops_database: str = "ops"
291
+ ) -> None:
292
+ """Record partition metadata for a backup in the backup_partitions table.
236
293
 
237
294
  Args:
238
295
  db: Database connection
@@ -249,7 +306,7 @@ def record_backup_partitions(db, label: str, partitions: list[dict[str, str]]) -
249
306
  key_hash = hashlib.md5(composite_key.encode("utf-8")).hexdigest()
250
307
 
251
308
  db.execute(f"""
252
- INSERT INTO ops.backup_partitions
309
+ INSERT INTO {ops_database}.backup_partitions
253
310
  (key_hash, label, database_name, table_name, partition_name)
254
311
  VALUES ({utils.quote_value(key_hash)}, {utils.quote_value(label)}, {utils.quote_value(partition["database"])}, {utils.quote_value(partition["table"])}, {utils.quote_value(partition["partition_name"])})
255
312
  """)