starrocks-br 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- starrocks_br/cli.py +257 -193
- starrocks_br/concurrency.py +50 -50
- starrocks_br/config.py +31 -23
- starrocks_br/db.py +37 -37
- starrocks_br/executor.py +100 -71
- starrocks_br/health.py +1 -6
- starrocks_br/history.py +5 -6
- starrocks_br/labels.py +14 -10
- starrocks_br/planner.py +113 -111
- starrocks_br/repository.py +3 -5
- starrocks_br/restore.py +240 -187
- starrocks_br/schema.py +15 -14
- starrocks_br/timezone.py +28 -29
- starrocks_br/utils.py +86 -0
- starrocks_br-0.4.0.dist-info/METADATA +152 -0
- starrocks_br-0.4.0.dist-info/RECORD +21 -0
- starrocks_br-0.3.0.dist-info/METADATA +0 -456
- starrocks_br-0.3.0.dist-info/RECORD +0 -20
- {starrocks_br-0.3.0.dist-info → starrocks_br-0.4.0.dist-info}/WHEEL +0 -0
- {starrocks_br-0.3.0.dist-info → starrocks_br-0.4.0.dist-info}/entry_points.txt +0 -0
- {starrocks_br-0.3.0.dist-info → starrocks_br-0.4.0.dist-info}/top_level.txt +0 -0
starrocks_br/planner.py
CHANGED
|
@@ -1,17 +1,17 @@
|
|
|
1
|
-
from typing import List, Dict, Optional
|
|
2
1
|
import datetime
|
|
3
2
|
import hashlib
|
|
3
|
+
from typing import Optional
|
|
4
4
|
|
|
5
|
-
from starrocks_br import logger, timezone
|
|
5
|
+
from starrocks_br import logger, timezone, utils
|
|
6
6
|
|
|
7
7
|
|
|
8
|
-
def find_latest_full_backup(db, database: str) -> Optional[
|
|
8
|
+
def find_latest_full_backup(db, database: str) -> Optional[dict[str, str]]:
|
|
9
9
|
"""Find the latest successful full backup for a database.
|
|
10
|
-
|
|
10
|
+
|
|
11
11
|
Args:
|
|
12
12
|
db: Database connection
|
|
13
13
|
database: Database name to search for
|
|
14
|
-
|
|
14
|
+
|
|
15
15
|
Returns:
|
|
16
16
|
Dictionary with keys: label, backup_type, finished_at, or None if no full backup found.
|
|
17
17
|
The finished_at value is returned as a string in the cluster timezone format.
|
|
@@ -21,125 +21,126 @@ def find_latest_full_backup(db, database: str) -> Optional[Dict[str, str]]:
|
|
|
21
21
|
FROM ops.backup_history
|
|
22
22
|
WHERE backup_type = 'full'
|
|
23
23
|
AND status = 'FINISHED'
|
|
24
|
-
AND label LIKE
|
|
24
|
+
AND label LIKE {utils.quote_value(f"{database}_%")}
|
|
25
25
|
ORDER BY finished_at DESC
|
|
26
26
|
LIMIT 1
|
|
27
27
|
"""
|
|
28
|
-
|
|
28
|
+
|
|
29
29
|
rows = db.query(query)
|
|
30
|
-
|
|
30
|
+
|
|
31
31
|
if not rows:
|
|
32
32
|
return None
|
|
33
|
-
|
|
33
|
+
|
|
34
34
|
row = rows[0]
|
|
35
35
|
finished_at = row[2]
|
|
36
|
-
|
|
36
|
+
|
|
37
37
|
if isinstance(finished_at, datetime.datetime):
|
|
38
|
-
|
|
39
|
-
finished_at =
|
|
38
|
+
finished_at_normalized = timezone.normalize_datetime_to_tz(finished_at, db.timezone)
|
|
39
|
+
finished_at = finished_at_normalized.strftime("%Y-%m-%d %H:%M:%S")
|
|
40
40
|
elif not isinstance(finished_at, str):
|
|
41
41
|
finished_at = str(finished_at)
|
|
42
|
-
|
|
43
|
-
return {
|
|
44
|
-
"label": row[0],
|
|
45
|
-
"backup_type": row[1],
|
|
46
|
-
"finished_at": finished_at
|
|
47
|
-
}
|
|
48
42
|
|
|
43
|
+
return {"label": row[0], "backup_type": row[1], "finished_at": finished_at}
|
|
49
44
|
|
|
50
|
-
|
|
45
|
+
|
|
46
|
+
def find_tables_by_group(db, group_name: str) -> list[dict[str, str]]:
|
|
51
47
|
"""Find tables belonging to a specific inventory group.
|
|
52
|
-
|
|
48
|
+
|
|
53
49
|
Returns list of dictionaries with keys: database, table.
|
|
54
50
|
Supports '*' table wildcard which signifies all tables in a database.
|
|
55
51
|
"""
|
|
56
52
|
query = f"""
|
|
57
53
|
SELECT database_name, table_name
|
|
58
54
|
FROM ops.table_inventory
|
|
59
|
-
WHERE inventory_group =
|
|
55
|
+
WHERE inventory_group = {utils.quote_value(group_name)}
|
|
60
56
|
ORDER BY database_name, table_name
|
|
61
57
|
"""
|
|
62
58
|
rows = db.query(query)
|
|
63
|
-
return [
|
|
64
|
-
{"database": row[0], "table": row[1]} for row in rows
|
|
65
|
-
]
|
|
59
|
+
return [{"database": row[0], "table": row[1]} for row in rows]
|
|
66
60
|
|
|
67
61
|
|
|
68
|
-
def find_recent_partitions(
|
|
62
|
+
def find_recent_partitions(
|
|
63
|
+
db, database: str, baseline_backup_label: Optional[str] = None, *, group_name: str
|
|
64
|
+
) -> list[dict[str, str]]:
|
|
69
65
|
"""Find partitions updated since baseline for tables in the given inventory group.
|
|
70
|
-
|
|
66
|
+
|
|
71
67
|
Args:
|
|
72
68
|
db: Database connection
|
|
73
69
|
database: Database name (StarRocks database scope for backup)
|
|
74
70
|
baseline_backup_label: Optional specific backup label to use as baseline.
|
|
75
71
|
group_name: Inventory group whose tables will be considered
|
|
76
|
-
|
|
72
|
+
|
|
77
73
|
Returns list of dictionaries with keys: database, table, partition_name.
|
|
78
74
|
Only partitions of tables within the specified database are returned.
|
|
79
75
|
"""
|
|
80
76
|
cluster_tz = db.timezone
|
|
81
|
-
|
|
77
|
+
|
|
82
78
|
if baseline_backup_label:
|
|
83
79
|
baseline_query = f"""
|
|
84
80
|
SELECT finished_at
|
|
85
81
|
FROM ops.backup_history
|
|
86
|
-
WHERE label =
|
|
82
|
+
WHERE label = {utils.quote_value(baseline_backup_label)}
|
|
87
83
|
AND status = 'FINISHED'
|
|
88
84
|
"""
|
|
89
85
|
baseline_rows = db.query(baseline_query)
|
|
90
86
|
if not baseline_rows:
|
|
91
|
-
raise ValueError(
|
|
87
|
+
raise ValueError(
|
|
88
|
+
f"Baseline backup '{baseline_backup_label}' not found or not successful"
|
|
89
|
+
)
|
|
92
90
|
baseline_time_raw = baseline_rows[0][0]
|
|
93
91
|
else:
|
|
94
92
|
latest_backup = find_latest_full_backup(db, database)
|
|
95
93
|
if not latest_backup:
|
|
96
|
-
raise ValueError(
|
|
97
|
-
|
|
98
|
-
|
|
94
|
+
raise ValueError(
|
|
95
|
+
f"No successful full backup found for database '{database}'. Run a full database backup first."
|
|
96
|
+
)
|
|
97
|
+
baseline_time_raw = latest_backup["finished_at"]
|
|
98
|
+
|
|
99
99
|
if isinstance(baseline_time_raw, datetime.datetime):
|
|
100
100
|
baseline_time_str = baseline_time_raw.strftime("%Y-%m-%d %H:%M:%S")
|
|
101
101
|
elif isinstance(baseline_time_raw, str):
|
|
102
102
|
baseline_time_str = baseline_time_raw
|
|
103
103
|
else:
|
|
104
104
|
baseline_time_str = str(baseline_time_raw)
|
|
105
|
-
|
|
105
|
+
|
|
106
106
|
baseline_dt = timezone.parse_datetime_with_tz(baseline_time_str, cluster_tz)
|
|
107
|
-
|
|
107
|
+
|
|
108
108
|
group_tables = find_tables_by_group(db, group_name)
|
|
109
109
|
|
|
110
110
|
if not group_tables:
|
|
111
111
|
return []
|
|
112
112
|
|
|
113
|
-
db_group_tables = [t for t in group_tables if t[
|
|
113
|
+
db_group_tables = [t for t in group_tables if t["database"] == database]
|
|
114
114
|
|
|
115
115
|
if not db_group_tables:
|
|
116
116
|
return []
|
|
117
|
-
|
|
117
|
+
|
|
118
118
|
concrete_tables = []
|
|
119
119
|
for table_entry in db_group_tables:
|
|
120
|
-
if table_entry[
|
|
121
|
-
show_tables_query =
|
|
120
|
+
if table_entry["table"] == "*":
|
|
121
|
+
show_tables_query = (
|
|
122
|
+
f"SHOW TABLES FROM {utils.quote_identifier(table_entry['database'])}"
|
|
123
|
+
)
|
|
122
124
|
tables_rows = db.query(show_tables_query)
|
|
123
125
|
for row in tables_rows:
|
|
124
|
-
concrete_tables.append({
|
|
125
|
-
'database': table_entry['database'],
|
|
126
|
-
'table': row[0]
|
|
127
|
-
})
|
|
126
|
+
concrete_tables.append({"database": table_entry["database"], "table": row[0]})
|
|
128
127
|
else:
|
|
129
128
|
concrete_tables.append(table_entry)
|
|
130
|
-
|
|
129
|
+
|
|
131
130
|
recent_partitions = []
|
|
132
131
|
for table_entry in concrete_tables:
|
|
133
|
-
db_name = table_entry[
|
|
134
|
-
table_name = table_entry[
|
|
135
|
-
|
|
136
|
-
show_partitions_query =
|
|
132
|
+
db_name = table_entry["database"]
|
|
133
|
+
table_name = table_entry["table"]
|
|
134
|
+
|
|
135
|
+
show_partitions_query = (
|
|
136
|
+
f"SHOW PARTITIONS FROM {utils.build_qualified_table_name(db_name, table_name)}"
|
|
137
|
+
)
|
|
137
138
|
try:
|
|
138
139
|
partition_rows = db.query(show_partitions_query)
|
|
139
140
|
except Exception as e:
|
|
140
141
|
logger.error(f"Error showing partitions for table {db_name}.{table_name}: {e}")
|
|
141
142
|
continue
|
|
142
|
-
|
|
143
|
+
|
|
143
144
|
for row in partition_rows:
|
|
144
145
|
# FOR SHARED NOTHING CLUSTER:
|
|
145
146
|
# PartitionId, PartitionName, VisibleVersion, VisibleVersionTime, VisibleVersionHash, State, PartitionKey, Range, DistributionKey, Buckets, ReplicationNum, StorageMedium, CooldownTime, LastConsistencyCheckTime, DataSize, StorageSize, IsInMemory, RowCount, DataVersion, VersionEpoch, VersionTxnType
|
|
@@ -152,86 +153,90 @@ def find_recent_partitions(db, database: str, baseline_backup_label: Optional[st
|
|
|
152
153
|
visible_version_time_str = visible_version_time
|
|
153
154
|
else:
|
|
154
155
|
visible_version_time_str = str(visible_version_time)
|
|
155
|
-
|
|
156
|
-
visible_version_dt = timezone.parse_datetime_with_tz(
|
|
157
|
-
|
|
156
|
+
|
|
157
|
+
visible_version_dt = timezone.parse_datetime_with_tz(
|
|
158
|
+
visible_version_time_str, cluster_tz
|
|
159
|
+
)
|
|
160
|
+
|
|
158
161
|
if visible_version_dt > baseline_dt:
|
|
159
|
-
recent_partitions.append(
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
})
|
|
164
|
-
|
|
162
|
+
recent_partitions.append(
|
|
163
|
+
{"database": db_name, "table": table_name, "partition_name": partition_name}
|
|
164
|
+
)
|
|
165
|
+
|
|
165
166
|
return recent_partitions
|
|
166
167
|
|
|
167
168
|
|
|
168
|
-
def build_incremental_backup_command(
|
|
169
|
+
def build_incremental_backup_command(
|
|
170
|
+
partitions: list[dict[str, str]], repository: str, label: str, database: str
|
|
171
|
+
) -> str:
|
|
169
172
|
"""Build BACKUP command for incremental backup of specific partitions.
|
|
170
|
-
|
|
173
|
+
|
|
171
174
|
Args:
|
|
172
175
|
partitions: List of partitions to backup
|
|
173
176
|
repository: Repository name
|
|
174
177
|
label: Backup label
|
|
175
178
|
database: Database name (StarRocks requires BACKUP to be database-specific)
|
|
176
|
-
|
|
179
|
+
|
|
177
180
|
Note: Filters partitions to only include those from the specified database.
|
|
178
181
|
"""
|
|
179
182
|
if not partitions:
|
|
180
183
|
return ""
|
|
181
|
-
|
|
182
|
-
db_partitions = [p for p in partitions if p[
|
|
183
|
-
|
|
184
|
+
|
|
185
|
+
db_partitions = [p for p in partitions if p["database"] == database]
|
|
186
|
+
|
|
184
187
|
if not db_partitions:
|
|
185
188
|
return ""
|
|
186
|
-
|
|
189
|
+
|
|
187
190
|
table_partitions = {}
|
|
188
191
|
for partition in db_partitions:
|
|
189
|
-
table_name = partition[
|
|
192
|
+
table_name = partition["table"]
|
|
190
193
|
if table_name not in table_partitions:
|
|
191
194
|
table_partitions[table_name] = []
|
|
192
|
-
table_partitions[table_name].append(partition[
|
|
193
|
-
|
|
195
|
+
table_partitions[table_name].append(partition["partition_name"])
|
|
196
|
+
|
|
194
197
|
on_clauses = []
|
|
195
198
|
for table, parts in table_partitions.items():
|
|
196
|
-
partitions_str = ", ".join(parts)
|
|
197
|
-
on_clauses.append(f"TABLE {table} PARTITION ({partitions_str})")
|
|
198
|
-
|
|
199
|
+
partitions_str = ", ".join(utils.quote_identifier(p) for p in parts)
|
|
200
|
+
on_clauses.append(f"TABLE {utils.quote_identifier(table)} PARTITION ({partitions_str})")
|
|
201
|
+
|
|
199
202
|
on_clause = ",\n ".join(on_clauses)
|
|
200
|
-
|
|
201
|
-
command = f"""BACKUP DATABASE {database} SNAPSHOT {label}
|
|
202
|
-
TO {repository}
|
|
203
|
+
|
|
204
|
+
command = f"""BACKUP DATABASE {utils.quote_identifier(database)} SNAPSHOT {utils.quote_identifier(label)}
|
|
205
|
+
TO {utils.quote_identifier(repository)}
|
|
203
206
|
ON ({on_clause})"""
|
|
204
|
-
|
|
207
|
+
|
|
205
208
|
return command
|
|
206
209
|
|
|
207
210
|
|
|
208
|
-
def build_full_backup_command(
|
|
211
|
+
def build_full_backup_command(
|
|
212
|
+
db, group_name: str, repository: str, label: str, database: str
|
|
213
|
+
) -> str:
|
|
209
214
|
"""Build BACKUP command for an inventory group.
|
|
210
|
-
|
|
215
|
+
|
|
211
216
|
If the group contains '*' for any entry in the target database, generate a
|
|
212
217
|
simple BACKUP DATABASE command. Otherwise, generate ON (TABLE ...) list for
|
|
213
218
|
the specific tables within the database.
|
|
214
219
|
"""
|
|
215
220
|
tables = find_tables_by_group(db, group_name)
|
|
216
221
|
|
|
217
|
-
db_entries = [t for t in tables if t[
|
|
222
|
+
db_entries = [t for t in tables if t["database"] == database]
|
|
218
223
|
if not db_entries:
|
|
219
224
|
return ""
|
|
220
225
|
|
|
221
|
-
if any(t[
|
|
222
|
-
return f"""BACKUP DATABASE {database} SNAPSHOT {label}
|
|
223
|
-
TO {repository}"""
|
|
226
|
+
if any(t["table"] == "*" for t in db_entries):
|
|
227
|
+
return f"""BACKUP DATABASE {utils.quote_identifier(database)} SNAPSHOT {utils.quote_identifier(label)}
|
|
228
|
+
TO {utils.quote_identifier(repository)}"""
|
|
224
229
|
|
|
225
230
|
on_clauses = []
|
|
226
231
|
for t in db_entries:
|
|
227
|
-
on_clauses.append(f"TABLE {t['table']}")
|
|
232
|
+
on_clauses.append(f"TABLE {utils.quote_identifier(t['table'])}")
|
|
228
233
|
on_clause = ",\n ".join(on_clauses)
|
|
229
|
-
return f"""BACKUP DATABASE {database} SNAPSHOT {label}
|
|
230
|
-
TO {repository}
|
|
234
|
+
return f"""BACKUP DATABASE {utils.quote_identifier(database)} SNAPSHOT {utils.quote_identifier(label)}
|
|
235
|
+
TO {utils.quote_identifier(repository)}
|
|
231
236
|
ON ({on_clause})"""
|
|
232
237
|
|
|
233
238
|
|
|
234
|
-
def record_backup_partitions(db, label: str, partitions:
|
|
239
|
+
def record_backup_partitions(db, label: str, partitions: list[dict[str, str]]) -> None:
|
|
235
240
|
"""Record partition metadata for a backup in ops.backup_partitions table.
|
|
236
241
|
|
|
237
242
|
Args:
|
|
@@ -243,62 +248,59 @@ def record_backup_partitions(db, label: str, partitions: List[Dict[str, str]]) -
|
|
|
243
248
|
return
|
|
244
249
|
|
|
245
250
|
for partition in partitions:
|
|
246
|
-
composite_key =
|
|
247
|
-
|
|
251
|
+
composite_key = (
|
|
252
|
+
f"{label}|{partition['database']}|{partition['table']}|{partition['partition_name']}"
|
|
253
|
+
)
|
|
254
|
+
key_hash = hashlib.md5(composite_key.encode("utf-8")).hexdigest()
|
|
248
255
|
|
|
249
256
|
db.execute(f"""
|
|
250
257
|
INSERT INTO ops.backup_partitions
|
|
251
258
|
(key_hash, label, database_name, table_name, partition_name)
|
|
252
|
-
VALUES (
|
|
259
|
+
VALUES ({utils.quote_value(key_hash)}, {utils.quote_value(label)}, {utils.quote_value(partition["database"])}, {utils.quote_value(partition["table"])}, {utils.quote_value(partition["partition_name"])})
|
|
253
260
|
""")
|
|
254
261
|
|
|
255
262
|
|
|
256
|
-
def get_all_partitions_for_tables(
|
|
263
|
+
def get_all_partitions_for_tables(
|
|
264
|
+
db, database: str, tables: list[dict[str, str]]
|
|
265
|
+
) -> list[dict[str, str]]:
|
|
257
266
|
"""Get all existing partitions for the specified tables.
|
|
258
|
-
|
|
267
|
+
|
|
259
268
|
Args:
|
|
260
269
|
db: Database connection
|
|
261
270
|
database: Database name
|
|
262
271
|
tables: List of tables with keys: database, table
|
|
263
|
-
|
|
272
|
+
|
|
264
273
|
Returns:
|
|
265
274
|
List of partitions with keys: database, table, partition_name
|
|
266
275
|
"""
|
|
267
276
|
if not tables:
|
|
268
277
|
return []
|
|
269
|
-
|
|
270
|
-
db_tables = [t for t in tables if t[
|
|
278
|
+
|
|
279
|
+
db_tables = [t for t in tables if t["database"] == database]
|
|
271
280
|
if not db_tables:
|
|
272
281
|
return []
|
|
273
|
-
|
|
274
|
-
where_conditions = [f"DB_NAME =
|
|
275
|
-
|
|
282
|
+
|
|
283
|
+
where_conditions = [f"DB_NAME = {utils.quote_value(database)}", "PARTITION_NAME IS NOT NULL"]
|
|
284
|
+
|
|
276
285
|
table_conditions = []
|
|
277
286
|
for table in db_tables:
|
|
278
|
-
if table[
|
|
287
|
+
if table["table"] == "*":
|
|
279
288
|
pass
|
|
280
289
|
else:
|
|
281
|
-
table_conditions.append(f"TABLE_NAME =
|
|
282
|
-
|
|
290
|
+
table_conditions.append(f"TABLE_NAME = {utils.quote_value(table['table'])}")
|
|
291
|
+
|
|
283
292
|
if table_conditions:
|
|
284
293
|
where_conditions.append("(" + " OR ".join(table_conditions) + ")")
|
|
285
|
-
|
|
294
|
+
|
|
286
295
|
where_clause = " AND ".join(where_conditions)
|
|
287
|
-
|
|
296
|
+
|
|
288
297
|
query = f"""
|
|
289
298
|
SELECT DB_NAME, TABLE_NAME, PARTITION_NAME
|
|
290
299
|
FROM information_schema.partitions_meta
|
|
291
300
|
WHERE {where_clause}
|
|
292
301
|
ORDER BY TABLE_NAME, PARTITION_NAME
|
|
293
302
|
"""
|
|
294
|
-
|
|
303
|
+
|
|
295
304
|
rows = db.query(query)
|
|
296
|
-
|
|
297
|
-
return [
|
|
298
|
-
{
|
|
299
|
-
"database": row[0],
|
|
300
|
-
"table": row[1],
|
|
301
|
-
"partition_name": row[2]
|
|
302
|
-
}
|
|
303
|
-
for row in rows
|
|
304
|
-
]
|
|
305
|
+
|
|
306
|
+
return [{"database": row[0], "table": row[1], "partition_name": row[2]} for row in rows]
|
starrocks_br/repository.py
CHANGED
|
@@ -3,11 +3,11 @@ from __future__ import annotations
|
|
|
3
3
|
|
|
4
4
|
def ensure_repository(db, name: str) -> None:
|
|
5
5
|
"""Verify that the specified repository exists and is accessible.
|
|
6
|
-
|
|
6
|
+
|
|
7
7
|
Args:
|
|
8
8
|
db: Database connection
|
|
9
9
|
name: Repository name to verify
|
|
10
|
-
|
|
10
|
+
|
|
11
11
|
Raises:
|
|
12
12
|
RuntimeError: If repository doesn't exist or has errors
|
|
13
13
|
"""
|
|
@@ -18,7 +18,7 @@ def ensure_repository(db, name: str) -> None:
|
|
|
18
18
|
f" CREATE REPOSITORY {name} WITH BROKER ON LOCATION '...' PROPERTIES(...)\n"
|
|
19
19
|
f"For examples, see: https://docs.starrocks.io/docs/sql-reference/sql-statements/data-definition/backup_restore/CREATE_REPOSITORY/"
|
|
20
20
|
)
|
|
21
|
-
|
|
21
|
+
|
|
22
22
|
# SHOW REPOSITORIES returns: RepoId, RepoName, CreateTime, IsReadOnly, Location, Broker, ErrMsg
|
|
23
23
|
err_msg = existing[6]
|
|
24
24
|
if err_msg and str(err_msg).strip().upper() not in {"", "NULL", "NONE"}:
|
|
@@ -32,5 +32,3 @@ def _find_repository(db, name: str):
|
|
|
32
32
|
if row and row[1] == name:
|
|
33
33
|
return row
|
|
34
34
|
return None
|
|
35
|
-
|
|
36
|
-
|