starrocks-br 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- starrocks_br/cli.py +307 -217
- starrocks_br/concurrency.py +50 -50
- starrocks_br/config.py +31 -23
- starrocks_br/db.py +38 -38
- starrocks_br/error_handler.py +265 -0
- starrocks_br/exceptions.py +93 -0
- starrocks_br/executor.py +102 -73
- starrocks_br/health.py +1 -6
- starrocks_br/history.py +5 -8
- starrocks_br/labels.py +14 -10
- starrocks_br/logger.py +45 -15
- starrocks_br/planner.py +112 -111
- starrocks_br/repository.py +3 -5
- starrocks_br/restore.py +241 -191
- starrocks_br/schema.py +15 -14
- starrocks_br/timezone.py +29 -31
- starrocks_br/utils.py +86 -0
- starrocks_br-0.5.0.dist-info/METADATA +153 -0
- starrocks_br-0.5.0.dist-info/RECORD +23 -0
- starrocks_br-0.3.0.dist-info/METADATA +0 -456
- starrocks_br-0.3.0.dist-info/RECORD +0 -20
- {starrocks_br-0.3.0.dist-info → starrocks_br-0.5.0.dist-info}/WHEEL +0 -0
- {starrocks_br-0.3.0.dist-info → starrocks_br-0.5.0.dist-info}/entry_points.txt +0 -0
- {starrocks_br-0.3.0.dist-info → starrocks_br-0.5.0.dist-info}/top_level.txt +0 -0
starrocks_br/planner.py
CHANGED
|
@@ -1,17 +1,16 @@
|
|
|
1
|
-
from typing import List, Dict, Optional
|
|
2
1
|
import datetime
|
|
3
2
|
import hashlib
|
|
4
3
|
|
|
5
|
-
from starrocks_br import logger, timezone
|
|
4
|
+
from starrocks_br import logger, timezone, utils
|
|
6
5
|
|
|
7
6
|
|
|
8
|
-
def find_latest_full_backup(db, database: str) ->
|
|
7
|
+
def find_latest_full_backup(db, database: str) -> dict[str, str] | None:
|
|
9
8
|
"""Find the latest successful full backup for a database.
|
|
10
|
-
|
|
9
|
+
|
|
11
10
|
Args:
|
|
12
11
|
db: Database connection
|
|
13
12
|
database: Database name to search for
|
|
14
|
-
|
|
13
|
+
|
|
15
14
|
Returns:
|
|
16
15
|
Dictionary with keys: label, backup_type, finished_at, or None if no full backup found.
|
|
17
16
|
The finished_at value is returned as a string in the cluster timezone format.
|
|
@@ -21,125 +20,126 @@ def find_latest_full_backup(db, database: str) -> Optional[Dict[str, str]]:
|
|
|
21
20
|
FROM ops.backup_history
|
|
22
21
|
WHERE backup_type = 'full'
|
|
23
22
|
AND status = 'FINISHED'
|
|
24
|
-
AND label LIKE
|
|
23
|
+
AND label LIKE {utils.quote_value(f"{database}_%")}
|
|
25
24
|
ORDER BY finished_at DESC
|
|
26
25
|
LIMIT 1
|
|
27
26
|
"""
|
|
28
|
-
|
|
27
|
+
|
|
29
28
|
rows = db.query(query)
|
|
30
|
-
|
|
29
|
+
|
|
31
30
|
if not rows:
|
|
32
31
|
return None
|
|
33
|
-
|
|
32
|
+
|
|
34
33
|
row = rows[0]
|
|
35
34
|
finished_at = row[2]
|
|
36
|
-
|
|
35
|
+
|
|
37
36
|
if isinstance(finished_at, datetime.datetime):
|
|
38
|
-
|
|
39
|
-
finished_at =
|
|
37
|
+
finished_at_normalized = timezone.normalize_datetime_to_tz(finished_at, db.timezone)
|
|
38
|
+
finished_at = finished_at_normalized.strftime("%Y-%m-%d %H:%M:%S")
|
|
40
39
|
elif not isinstance(finished_at, str):
|
|
41
40
|
finished_at = str(finished_at)
|
|
42
|
-
|
|
43
|
-
return {
|
|
44
|
-
"label": row[0],
|
|
45
|
-
"backup_type": row[1],
|
|
46
|
-
"finished_at": finished_at
|
|
47
|
-
}
|
|
48
41
|
|
|
42
|
+
return {"label": row[0], "backup_type": row[1], "finished_at": finished_at}
|
|
49
43
|
|
|
50
|
-
|
|
44
|
+
|
|
45
|
+
def find_tables_by_group(db, group_name: str) -> list[dict[str, str]]:
|
|
51
46
|
"""Find tables belonging to a specific inventory group.
|
|
52
|
-
|
|
47
|
+
|
|
53
48
|
Returns list of dictionaries with keys: database, table.
|
|
54
49
|
Supports '*' table wildcard which signifies all tables in a database.
|
|
55
50
|
"""
|
|
56
51
|
query = f"""
|
|
57
52
|
SELECT database_name, table_name
|
|
58
53
|
FROM ops.table_inventory
|
|
59
|
-
WHERE inventory_group =
|
|
54
|
+
WHERE inventory_group = {utils.quote_value(group_name)}
|
|
60
55
|
ORDER BY database_name, table_name
|
|
61
56
|
"""
|
|
62
57
|
rows = db.query(query)
|
|
63
|
-
return [
|
|
64
|
-
{"database": row[0], "table": row[1]} for row in rows
|
|
65
|
-
]
|
|
58
|
+
return [{"database": row[0], "table": row[1]} for row in rows]
|
|
66
59
|
|
|
67
60
|
|
|
68
|
-
def find_recent_partitions(
|
|
61
|
+
def find_recent_partitions(
|
|
62
|
+
db, database: str, baseline_backup_label: str | None = None, *, group_name: str
|
|
63
|
+
) -> list[dict[str, str]]:
|
|
69
64
|
"""Find partitions updated since baseline for tables in the given inventory group.
|
|
70
|
-
|
|
65
|
+
|
|
71
66
|
Args:
|
|
72
67
|
db: Database connection
|
|
73
68
|
database: Database name (StarRocks database scope for backup)
|
|
74
69
|
baseline_backup_label: Optional specific backup label to use as baseline.
|
|
75
70
|
group_name: Inventory group whose tables will be considered
|
|
76
|
-
|
|
71
|
+
|
|
77
72
|
Returns list of dictionaries with keys: database, table, partition_name.
|
|
78
73
|
Only partitions of tables within the specified database are returned.
|
|
79
74
|
"""
|
|
80
75
|
cluster_tz = db.timezone
|
|
81
|
-
|
|
76
|
+
|
|
82
77
|
if baseline_backup_label:
|
|
83
78
|
baseline_query = f"""
|
|
84
79
|
SELECT finished_at
|
|
85
80
|
FROM ops.backup_history
|
|
86
|
-
WHERE label =
|
|
81
|
+
WHERE label = {utils.quote_value(baseline_backup_label)}
|
|
87
82
|
AND status = 'FINISHED'
|
|
88
83
|
"""
|
|
89
84
|
baseline_rows = db.query(baseline_query)
|
|
90
85
|
if not baseline_rows:
|
|
91
|
-
raise ValueError(
|
|
86
|
+
raise ValueError(
|
|
87
|
+
f"Baseline backup '{baseline_backup_label}' not found or not successful"
|
|
88
|
+
)
|
|
92
89
|
baseline_time_raw = baseline_rows[0][0]
|
|
93
90
|
else:
|
|
94
91
|
latest_backup = find_latest_full_backup(db, database)
|
|
95
92
|
if not latest_backup:
|
|
96
|
-
raise ValueError(
|
|
97
|
-
|
|
98
|
-
|
|
93
|
+
raise ValueError(
|
|
94
|
+
f"No successful full backup found for database '{database}'. Run a full database backup first."
|
|
95
|
+
)
|
|
96
|
+
baseline_time_raw = latest_backup["finished_at"]
|
|
97
|
+
|
|
99
98
|
if isinstance(baseline_time_raw, datetime.datetime):
|
|
100
99
|
baseline_time_str = baseline_time_raw.strftime("%Y-%m-%d %H:%M:%S")
|
|
101
100
|
elif isinstance(baseline_time_raw, str):
|
|
102
101
|
baseline_time_str = baseline_time_raw
|
|
103
102
|
else:
|
|
104
103
|
baseline_time_str = str(baseline_time_raw)
|
|
105
|
-
|
|
104
|
+
|
|
106
105
|
baseline_dt = timezone.parse_datetime_with_tz(baseline_time_str, cluster_tz)
|
|
107
|
-
|
|
106
|
+
|
|
108
107
|
group_tables = find_tables_by_group(db, group_name)
|
|
109
108
|
|
|
110
109
|
if not group_tables:
|
|
111
110
|
return []
|
|
112
111
|
|
|
113
|
-
db_group_tables = [t for t in group_tables if t[
|
|
112
|
+
db_group_tables = [t for t in group_tables if t["database"] == database]
|
|
114
113
|
|
|
115
114
|
if not db_group_tables:
|
|
116
115
|
return []
|
|
117
|
-
|
|
116
|
+
|
|
118
117
|
concrete_tables = []
|
|
119
118
|
for table_entry in db_group_tables:
|
|
120
|
-
if table_entry[
|
|
121
|
-
show_tables_query =
|
|
119
|
+
if table_entry["table"] == "*":
|
|
120
|
+
show_tables_query = (
|
|
121
|
+
f"SHOW TABLES FROM {utils.quote_identifier(table_entry['database'])}"
|
|
122
|
+
)
|
|
122
123
|
tables_rows = db.query(show_tables_query)
|
|
123
124
|
for row in tables_rows:
|
|
124
|
-
concrete_tables.append({
|
|
125
|
-
'database': table_entry['database'],
|
|
126
|
-
'table': row[0]
|
|
127
|
-
})
|
|
125
|
+
concrete_tables.append({"database": table_entry["database"], "table": row[0]})
|
|
128
126
|
else:
|
|
129
127
|
concrete_tables.append(table_entry)
|
|
130
|
-
|
|
128
|
+
|
|
131
129
|
recent_partitions = []
|
|
132
130
|
for table_entry in concrete_tables:
|
|
133
|
-
db_name = table_entry[
|
|
134
|
-
table_name = table_entry[
|
|
135
|
-
|
|
136
|
-
show_partitions_query =
|
|
131
|
+
db_name = table_entry["database"]
|
|
132
|
+
table_name = table_entry["table"]
|
|
133
|
+
|
|
134
|
+
show_partitions_query = (
|
|
135
|
+
f"SHOW PARTITIONS FROM {utils.build_qualified_table_name(db_name, table_name)}"
|
|
136
|
+
)
|
|
137
137
|
try:
|
|
138
138
|
partition_rows = db.query(show_partitions_query)
|
|
139
139
|
except Exception as e:
|
|
140
140
|
logger.error(f"Error showing partitions for table {db_name}.{table_name}: {e}")
|
|
141
141
|
continue
|
|
142
|
-
|
|
142
|
+
|
|
143
143
|
for row in partition_rows:
|
|
144
144
|
# FOR SHARED NOTHING CLUSTER:
|
|
145
145
|
# PartitionId, PartitionName, VisibleVersion, VisibleVersionTime, VisibleVersionHash, State, PartitionKey, Range, DistributionKey, Buckets, ReplicationNum, StorageMedium, CooldownTime, LastConsistencyCheckTime, DataSize, StorageSize, IsInMemory, RowCount, DataVersion, VersionEpoch, VersionTxnType
|
|
@@ -152,86 +152,90 @@ def find_recent_partitions(db, database: str, baseline_backup_label: Optional[st
|
|
|
152
152
|
visible_version_time_str = visible_version_time
|
|
153
153
|
else:
|
|
154
154
|
visible_version_time_str = str(visible_version_time)
|
|
155
|
-
|
|
156
|
-
visible_version_dt = timezone.parse_datetime_with_tz(
|
|
157
|
-
|
|
155
|
+
|
|
156
|
+
visible_version_dt = timezone.parse_datetime_with_tz(
|
|
157
|
+
visible_version_time_str, cluster_tz
|
|
158
|
+
)
|
|
159
|
+
|
|
158
160
|
if visible_version_dt > baseline_dt:
|
|
159
|
-
recent_partitions.append(
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
})
|
|
164
|
-
|
|
161
|
+
recent_partitions.append(
|
|
162
|
+
{"database": db_name, "table": table_name, "partition_name": partition_name}
|
|
163
|
+
)
|
|
164
|
+
|
|
165
165
|
return recent_partitions
|
|
166
166
|
|
|
167
167
|
|
|
168
|
-
def build_incremental_backup_command(
|
|
168
|
+
def build_incremental_backup_command(
|
|
169
|
+
partitions: list[dict[str, str]], repository: str, label: str, database: str
|
|
170
|
+
) -> str:
|
|
169
171
|
"""Build BACKUP command for incremental backup of specific partitions.
|
|
170
|
-
|
|
172
|
+
|
|
171
173
|
Args:
|
|
172
174
|
partitions: List of partitions to backup
|
|
173
175
|
repository: Repository name
|
|
174
176
|
label: Backup label
|
|
175
177
|
database: Database name (StarRocks requires BACKUP to be database-specific)
|
|
176
|
-
|
|
178
|
+
|
|
177
179
|
Note: Filters partitions to only include those from the specified database.
|
|
178
180
|
"""
|
|
179
181
|
if not partitions:
|
|
180
182
|
return ""
|
|
181
|
-
|
|
182
|
-
db_partitions = [p for p in partitions if p[
|
|
183
|
-
|
|
183
|
+
|
|
184
|
+
db_partitions = [p for p in partitions if p["database"] == database]
|
|
185
|
+
|
|
184
186
|
if not db_partitions:
|
|
185
187
|
return ""
|
|
186
|
-
|
|
188
|
+
|
|
187
189
|
table_partitions = {}
|
|
188
190
|
for partition in db_partitions:
|
|
189
|
-
table_name = partition[
|
|
191
|
+
table_name = partition["table"]
|
|
190
192
|
if table_name not in table_partitions:
|
|
191
193
|
table_partitions[table_name] = []
|
|
192
|
-
table_partitions[table_name].append(partition[
|
|
193
|
-
|
|
194
|
+
table_partitions[table_name].append(partition["partition_name"])
|
|
195
|
+
|
|
194
196
|
on_clauses = []
|
|
195
197
|
for table, parts in table_partitions.items():
|
|
196
|
-
partitions_str = ", ".join(parts)
|
|
197
|
-
on_clauses.append(f"TABLE {table} PARTITION ({partitions_str})")
|
|
198
|
-
|
|
198
|
+
partitions_str = ", ".join(utils.quote_identifier(p) for p in parts)
|
|
199
|
+
on_clauses.append(f"TABLE {utils.quote_identifier(table)} PARTITION ({partitions_str})")
|
|
200
|
+
|
|
199
201
|
on_clause = ",\n ".join(on_clauses)
|
|
200
|
-
|
|
201
|
-
command = f"""BACKUP DATABASE {database} SNAPSHOT {label}
|
|
202
|
-
TO {repository}
|
|
202
|
+
|
|
203
|
+
command = f"""BACKUP DATABASE {utils.quote_identifier(database)} SNAPSHOT {utils.quote_identifier(label)}
|
|
204
|
+
TO {utils.quote_identifier(repository)}
|
|
203
205
|
ON ({on_clause})"""
|
|
204
|
-
|
|
206
|
+
|
|
205
207
|
return command
|
|
206
208
|
|
|
207
209
|
|
|
208
|
-
def build_full_backup_command(
|
|
210
|
+
def build_full_backup_command(
|
|
211
|
+
db, group_name: str, repository: str, label: str, database: str
|
|
212
|
+
) -> str:
|
|
209
213
|
"""Build BACKUP command for an inventory group.
|
|
210
|
-
|
|
214
|
+
|
|
211
215
|
If the group contains '*' for any entry in the target database, generate a
|
|
212
216
|
simple BACKUP DATABASE command. Otherwise, generate ON (TABLE ...) list for
|
|
213
217
|
the specific tables within the database.
|
|
214
218
|
"""
|
|
215
219
|
tables = find_tables_by_group(db, group_name)
|
|
216
220
|
|
|
217
|
-
db_entries = [t for t in tables if t[
|
|
221
|
+
db_entries = [t for t in tables if t["database"] == database]
|
|
218
222
|
if not db_entries:
|
|
219
223
|
return ""
|
|
220
224
|
|
|
221
|
-
if any(t[
|
|
222
|
-
return f"""BACKUP DATABASE {database} SNAPSHOT {label}
|
|
223
|
-
TO {repository}"""
|
|
225
|
+
if any(t["table"] == "*" for t in db_entries):
|
|
226
|
+
return f"""BACKUP DATABASE {utils.quote_identifier(database)} SNAPSHOT {utils.quote_identifier(label)}
|
|
227
|
+
TO {utils.quote_identifier(repository)}"""
|
|
224
228
|
|
|
225
229
|
on_clauses = []
|
|
226
230
|
for t in db_entries:
|
|
227
|
-
on_clauses.append(f"TABLE {t['table']}")
|
|
231
|
+
on_clauses.append(f"TABLE {utils.quote_identifier(t['table'])}")
|
|
228
232
|
on_clause = ",\n ".join(on_clauses)
|
|
229
|
-
return f"""BACKUP DATABASE {database} SNAPSHOT {label}
|
|
230
|
-
TO {repository}
|
|
233
|
+
return f"""BACKUP DATABASE {utils.quote_identifier(database)} SNAPSHOT {utils.quote_identifier(label)}
|
|
234
|
+
TO {utils.quote_identifier(repository)}
|
|
231
235
|
ON ({on_clause})"""
|
|
232
236
|
|
|
233
237
|
|
|
234
|
-
def record_backup_partitions(db, label: str, partitions:
|
|
238
|
+
def record_backup_partitions(db, label: str, partitions: list[dict[str, str]]) -> None:
|
|
235
239
|
"""Record partition metadata for a backup in ops.backup_partitions table.
|
|
236
240
|
|
|
237
241
|
Args:
|
|
@@ -243,62 +247,59 @@ def record_backup_partitions(db, label: str, partitions: List[Dict[str, str]]) -
|
|
|
243
247
|
return
|
|
244
248
|
|
|
245
249
|
for partition in partitions:
|
|
246
|
-
composite_key =
|
|
247
|
-
|
|
250
|
+
composite_key = (
|
|
251
|
+
f"{label}|{partition['database']}|{partition['table']}|{partition['partition_name']}"
|
|
252
|
+
)
|
|
253
|
+
key_hash = hashlib.md5(composite_key.encode("utf-8")).hexdigest()
|
|
248
254
|
|
|
249
255
|
db.execute(f"""
|
|
250
256
|
INSERT INTO ops.backup_partitions
|
|
251
257
|
(key_hash, label, database_name, table_name, partition_name)
|
|
252
|
-
VALUES (
|
|
258
|
+
VALUES ({utils.quote_value(key_hash)}, {utils.quote_value(label)}, {utils.quote_value(partition["database"])}, {utils.quote_value(partition["table"])}, {utils.quote_value(partition["partition_name"])})
|
|
253
259
|
""")
|
|
254
260
|
|
|
255
261
|
|
|
256
|
-
def get_all_partitions_for_tables(
|
|
262
|
+
def get_all_partitions_for_tables(
|
|
263
|
+
db, database: str, tables: list[dict[str, str]]
|
|
264
|
+
) -> list[dict[str, str]]:
|
|
257
265
|
"""Get all existing partitions for the specified tables.
|
|
258
|
-
|
|
266
|
+
|
|
259
267
|
Args:
|
|
260
268
|
db: Database connection
|
|
261
269
|
database: Database name
|
|
262
270
|
tables: List of tables with keys: database, table
|
|
263
|
-
|
|
271
|
+
|
|
264
272
|
Returns:
|
|
265
273
|
List of partitions with keys: database, table, partition_name
|
|
266
274
|
"""
|
|
267
275
|
if not tables:
|
|
268
276
|
return []
|
|
269
|
-
|
|
270
|
-
db_tables = [t for t in tables if t[
|
|
277
|
+
|
|
278
|
+
db_tables = [t for t in tables if t["database"] == database]
|
|
271
279
|
if not db_tables:
|
|
272
280
|
return []
|
|
273
|
-
|
|
274
|
-
where_conditions = [f"DB_NAME =
|
|
275
|
-
|
|
281
|
+
|
|
282
|
+
where_conditions = [f"DB_NAME = {utils.quote_value(database)}", "PARTITION_NAME IS NOT NULL"]
|
|
283
|
+
|
|
276
284
|
table_conditions = []
|
|
277
285
|
for table in db_tables:
|
|
278
|
-
if table[
|
|
286
|
+
if table["table"] == "*":
|
|
279
287
|
pass
|
|
280
288
|
else:
|
|
281
|
-
table_conditions.append(f"TABLE_NAME =
|
|
282
|
-
|
|
289
|
+
table_conditions.append(f"TABLE_NAME = {utils.quote_value(table['table'])}")
|
|
290
|
+
|
|
283
291
|
if table_conditions:
|
|
284
292
|
where_conditions.append("(" + " OR ".join(table_conditions) + ")")
|
|
285
|
-
|
|
293
|
+
|
|
286
294
|
where_clause = " AND ".join(where_conditions)
|
|
287
|
-
|
|
295
|
+
|
|
288
296
|
query = f"""
|
|
289
297
|
SELECT DB_NAME, TABLE_NAME, PARTITION_NAME
|
|
290
298
|
FROM information_schema.partitions_meta
|
|
291
299
|
WHERE {where_clause}
|
|
292
300
|
ORDER BY TABLE_NAME, PARTITION_NAME
|
|
293
301
|
"""
|
|
294
|
-
|
|
302
|
+
|
|
295
303
|
rows = db.query(query)
|
|
296
|
-
|
|
297
|
-
return [
|
|
298
|
-
{
|
|
299
|
-
"database": row[0],
|
|
300
|
-
"table": row[1],
|
|
301
|
-
"partition_name": row[2]
|
|
302
|
-
}
|
|
303
|
-
for row in rows
|
|
304
|
-
]
|
|
304
|
+
|
|
305
|
+
return [{"database": row[0], "table": row[1], "partition_name": row[2]} for row in rows]
|
starrocks_br/repository.py
CHANGED
|
@@ -3,11 +3,11 @@ from __future__ import annotations
|
|
|
3
3
|
|
|
4
4
|
def ensure_repository(db, name: str) -> None:
|
|
5
5
|
"""Verify that the specified repository exists and is accessible.
|
|
6
|
-
|
|
6
|
+
|
|
7
7
|
Args:
|
|
8
8
|
db: Database connection
|
|
9
9
|
name: Repository name to verify
|
|
10
|
-
|
|
10
|
+
|
|
11
11
|
Raises:
|
|
12
12
|
RuntimeError: If repository doesn't exist or has errors
|
|
13
13
|
"""
|
|
@@ -18,7 +18,7 @@ def ensure_repository(db, name: str) -> None:
|
|
|
18
18
|
f" CREATE REPOSITORY {name} WITH BROKER ON LOCATION '...' PROPERTIES(...)\n"
|
|
19
19
|
f"For examples, see: https://docs.starrocks.io/docs/sql-reference/sql-statements/data-definition/backup_restore/CREATE_REPOSITORY/"
|
|
20
20
|
)
|
|
21
|
-
|
|
21
|
+
|
|
22
22
|
# SHOW REPOSITORIES returns: RepoId, RepoName, CreateTime, IsReadOnly, Location, Broker, ErrMsg
|
|
23
23
|
err_msg = existing[6]
|
|
24
24
|
if err_msg and str(err_msg).strip().upper() not in {"", "NULL", "NONE"}:
|
|
@@ -32,5 +32,3 @@ def _find_repository(db, name: str):
|
|
|
32
32
|
if row and row[1] == name:
|
|
33
33
|
return row
|
|
34
34
|
return None
|
|
35
|
-
|
|
36
|
-
|