starrocks-br 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- starrocks_br/__init__.py +1 -0
- starrocks_br/cli.py +385 -0
- starrocks_br/concurrency.py +177 -0
- starrocks_br/config.py +41 -0
- starrocks_br/db.py +88 -0
- starrocks_br/executor.py +245 -0
- starrocks_br/health.py +34 -0
- starrocks_br/history.py +93 -0
- starrocks_br/labels.py +52 -0
- starrocks_br/logger.py +36 -0
- starrocks_br/planner.py +280 -0
- starrocks_br/repository.py +36 -0
- starrocks_br/restore.py +493 -0
- starrocks_br/schema.py +144 -0
- starrocks_br-0.1.0.dist-info/METADATA +12 -0
- starrocks_br-0.1.0.dist-info/RECORD +19 -0
- starrocks_br-0.1.0.dist-info/WHEEL +5 -0
- starrocks_br-0.1.0.dist-info/entry_points.txt +2 -0
- starrocks_br-0.1.0.dist-info/top_level.txt +1 -0
starrocks_br/planner.py
ADDED
|
@@ -0,0 +1,280 @@
|
|
|
1
|
+
from typing import List, Dict, Optional
|
|
2
|
+
|
|
3
|
+
from starrocks_br import logger
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def find_latest_full_backup(db, database: str) -> Optional[Dict[str, str]]:
|
|
7
|
+
"""Find the latest successful full backup for a database.
|
|
8
|
+
|
|
9
|
+
Args:
|
|
10
|
+
db: Database connection
|
|
11
|
+
database: Database name to search for
|
|
12
|
+
|
|
13
|
+
Returns:
|
|
14
|
+
Dictionary with keys: label, backup_type, finished_at, or None if no full backup found
|
|
15
|
+
"""
|
|
16
|
+
query = f"""
|
|
17
|
+
SELECT label, backup_type, finished_at
|
|
18
|
+
FROM ops.backup_history
|
|
19
|
+
WHERE backup_type = 'full'
|
|
20
|
+
AND status = 'FINISHED'
|
|
21
|
+
AND label LIKE '{database}_%'
|
|
22
|
+
ORDER BY finished_at DESC
|
|
23
|
+
LIMIT 1
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
rows = db.query(query)
|
|
27
|
+
|
|
28
|
+
if not rows:
|
|
29
|
+
return None
|
|
30
|
+
|
|
31
|
+
row = rows[0]
|
|
32
|
+
return {
|
|
33
|
+
"label": row[0],
|
|
34
|
+
"backup_type": row[1],
|
|
35
|
+
"finished_at": row[2]
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def find_tables_by_group(db, group_name: str) -> List[Dict[str, str]]:
|
|
40
|
+
"""Find tables belonging to a specific inventory group.
|
|
41
|
+
|
|
42
|
+
Returns list of dictionaries with keys: database, table.
|
|
43
|
+
Supports '*' table wildcard which signifies all tables in a database.
|
|
44
|
+
"""
|
|
45
|
+
query = f"""
|
|
46
|
+
SELECT database_name, table_name
|
|
47
|
+
FROM ops.table_inventory
|
|
48
|
+
WHERE inventory_group = '{group_name}'
|
|
49
|
+
ORDER BY database_name, table_name
|
|
50
|
+
"""
|
|
51
|
+
rows = db.query(query)
|
|
52
|
+
return [
|
|
53
|
+
{"database": row[0], "table": row[1]} for row in rows
|
|
54
|
+
]
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def find_recent_partitions(db, database: str, baseline_backup_label: Optional[str] = None, *, group_name: str) -> List[Dict[str, str]]:
|
|
58
|
+
"""Find partitions updated since baseline for tables in the given inventory group.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
db: Database connection
|
|
62
|
+
database: Database name (StarRocks database scope for backup)
|
|
63
|
+
baseline_backup_label: Optional specific backup label to use as baseline.
|
|
64
|
+
group_name: Inventory group whose tables will be considered
|
|
65
|
+
|
|
66
|
+
Returns list of dictionaries with keys: database, table, partition_name.
|
|
67
|
+
Only partitions of tables within the specified database are returned.
|
|
68
|
+
"""
|
|
69
|
+
if baseline_backup_label:
|
|
70
|
+
baseline_query = f"""
|
|
71
|
+
SELECT finished_at
|
|
72
|
+
FROM ops.backup_history
|
|
73
|
+
WHERE label = '{baseline_backup_label}'
|
|
74
|
+
AND status = 'FINISHED'
|
|
75
|
+
"""
|
|
76
|
+
baseline_rows = db.query(baseline_query)
|
|
77
|
+
if not baseline_rows:
|
|
78
|
+
raise ValueError(f"Baseline backup '{baseline_backup_label}' not found or not successful")
|
|
79
|
+
baseline_time = baseline_rows[0][0]
|
|
80
|
+
else:
|
|
81
|
+
latest_backup = find_latest_full_backup(db, database)
|
|
82
|
+
if not latest_backup:
|
|
83
|
+
raise ValueError(f"No successful full backup found for database '{database}'. Run a full database backup first.")
|
|
84
|
+
baseline_time = latest_backup['finished_at']
|
|
85
|
+
|
|
86
|
+
if isinstance(baseline_time, str):
|
|
87
|
+
threshold_str = baseline_time
|
|
88
|
+
else:
|
|
89
|
+
threshold_str = baseline_time.strftime("%Y-%m-%d %H:%M:%S")
|
|
90
|
+
|
|
91
|
+
group_tables = find_tables_by_group(db, group_name)
|
|
92
|
+
|
|
93
|
+
if not group_tables:
|
|
94
|
+
return []
|
|
95
|
+
|
|
96
|
+
db_group_tables = [t for t in group_tables if t['database'] == database]
|
|
97
|
+
|
|
98
|
+
if not db_group_tables:
|
|
99
|
+
return []
|
|
100
|
+
|
|
101
|
+
concrete_tables = []
|
|
102
|
+
for table_entry in db_group_tables:
|
|
103
|
+
if table_entry['table'] == '*':
|
|
104
|
+
show_tables_query = f"SHOW TABLES FROM {table_entry['database']}"
|
|
105
|
+
tables_rows = db.query(show_tables_query)
|
|
106
|
+
for row in tables_rows:
|
|
107
|
+
concrete_tables.append({
|
|
108
|
+
'database': table_entry['database'],
|
|
109
|
+
'table': row[0]
|
|
110
|
+
})
|
|
111
|
+
else:
|
|
112
|
+
concrete_tables.append(table_entry)
|
|
113
|
+
|
|
114
|
+
recent_partitions = []
|
|
115
|
+
for table_entry in concrete_tables:
|
|
116
|
+
db_name = table_entry['database']
|
|
117
|
+
table_name = table_entry['table']
|
|
118
|
+
|
|
119
|
+
show_partitions_query = f"SHOW PARTITIONS FROM {db_name}.{table_name}"
|
|
120
|
+
try:
|
|
121
|
+
partition_rows = db.query(show_partitions_query)
|
|
122
|
+
except Exception as e:
|
|
123
|
+
logger.error(f"Error showing partitions for table {db_name}.{table_name}: {e}")
|
|
124
|
+
continue
|
|
125
|
+
|
|
126
|
+
for row in partition_rows:
|
|
127
|
+
# FOR SHARED NOTHING CLUSTER:
|
|
128
|
+
# PartitionId, PartitionName, VisibleVersion, VisibleVersionTime, VisibleVersionHash, State, PartitionKey, Range, DistributionKey, Buckets, ReplicationNum, StorageMedium, CooldownTime, LastConsistencyCheckTime, DataSize, StorageSize, IsInMemory, RowCount, DataVersion, VersionEpoch, VersionTxnType
|
|
129
|
+
partition_name = row[1]
|
|
130
|
+
visible_version_time = row[3]
|
|
131
|
+
|
|
132
|
+
if isinstance(visible_version_time, str):
|
|
133
|
+
version_time_str = visible_version_time
|
|
134
|
+
else:
|
|
135
|
+
version_time_str = visible_version_time.strftime("%Y-%m-%d %H:%M:%S")
|
|
136
|
+
|
|
137
|
+
if version_time_str > threshold_str:
|
|
138
|
+
recent_partitions.append({
|
|
139
|
+
'database': db_name,
|
|
140
|
+
'table': table_name,
|
|
141
|
+
'partition_name': partition_name
|
|
142
|
+
})
|
|
143
|
+
|
|
144
|
+
return recent_partitions
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def build_incremental_backup_command(partitions: List[Dict[str, str]], repository: str, label: str, database: str) -> str:
|
|
148
|
+
"""Build BACKUP command for incremental backup of specific partitions.
|
|
149
|
+
|
|
150
|
+
Args:
|
|
151
|
+
partitions: List of partitions to backup
|
|
152
|
+
repository: Repository name
|
|
153
|
+
label: Backup label
|
|
154
|
+
database: Database name (StarRocks requires BACKUP to be database-specific)
|
|
155
|
+
|
|
156
|
+
Note: Filters partitions to only include those from the specified database.
|
|
157
|
+
"""
|
|
158
|
+
if not partitions:
|
|
159
|
+
return ""
|
|
160
|
+
|
|
161
|
+
db_partitions = [p for p in partitions if p['database'] == database]
|
|
162
|
+
|
|
163
|
+
if not db_partitions:
|
|
164
|
+
return ""
|
|
165
|
+
|
|
166
|
+
table_partitions = {}
|
|
167
|
+
for partition in db_partitions:
|
|
168
|
+
table_name = partition['table']
|
|
169
|
+
if table_name not in table_partitions:
|
|
170
|
+
table_partitions[table_name] = []
|
|
171
|
+
table_partitions[table_name].append(partition['partition_name'])
|
|
172
|
+
|
|
173
|
+
on_clauses = []
|
|
174
|
+
for table, parts in table_partitions.items():
|
|
175
|
+
partitions_str = ", ".join(parts)
|
|
176
|
+
on_clauses.append(f"TABLE {table} PARTITION ({partitions_str})")
|
|
177
|
+
|
|
178
|
+
on_clause = ",\n ".join(on_clauses)
|
|
179
|
+
|
|
180
|
+
command = f"""BACKUP DATABASE {database} SNAPSHOT {label}
|
|
181
|
+
TO {repository}
|
|
182
|
+
ON ({on_clause})"""
|
|
183
|
+
|
|
184
|
+
return command
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def build_full_backup_command(db, group_name: str, repository: str, label: str, database: str) -> str:
|
|
188
|
+
"""Build BACKUP command for an inventory group.
|
|
189
|
+
|
|
190
|
+
If the group contains '*' for any entry in the target database, generate a
|
|
191
|
+
simple BACKUP DATABASE command. Otherwise, generate ON (TABLE ...) list for
|
|
192
|
+
the specific tables within the database.
|
|
193
|
+
"""
|
|
194
|
+
tables = find_tables_by_group(db, group_name)
|
|
195
|
+
|
|
196
|
+
db_entries = [t for t in tables if t['database'] == database]
|
|
197
|
+
if not db_entries:
|
|
198
|
+
return ""
|
|
199
|
+
|
|
200
|
+
if any(t['table'] == '*' for t in db_entries):
|
|
201
|
+
return f"""BACKUP DATABASE {database} SNAPSHOT {label}
|
|
202
|
+
TO {repository}"""
|
|
203
|
+
|
|
204
|
+
on_clauses = []
|
|
205
|
+
for t in db_entries:
|
|
206
|
+
on_clauses.append(f"TABLE {t['table']}")
|
|
207
|
+
on_clause = ",\n ".join(on_clauses)
|
|
208
|
+
return f"""BACKUP DATABASE {database} SNAPSHOT {label}
|
|
209
|
+
TO {repository}
|
|
210
|
+
ON ({on_clause})"""
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def record_backup_partitions(db, label: str, partitions: List[Dict[str, str]]) -> None:
|
|
214
|
+
"""Record partition metadata for a backup in ops.backup_partitions table.
|
|
215
|
+
|
|
216
|
+
Args:
|
|
217
|
+
db: Database connection
|
|
218
|
+
label: Backup label
|
|
219
|
+
partitions: List of partitions with keys: database, table, partition_name
|
|
220
|
+
"""
|
|
221
|
+
if not partitions:
|
|
222
|
+
return
|
|
223
|
+
|
|
224
|
+
for partition in partitions:
|
|
225
|
+
db.execute(f"""
|
|
226
|
+
INSERT INTO ops.backup_partitions
|
|
227
|
+
(label, database_name, table_name, partition_name)
|
|
228
|
+
VALUES ('{label}', '{partition['database']}', '{partition['table']}', '{partition['partition_name']}')
|
|
229
|
+
""")
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def get_all_partitions_for_tables(db, database: str, tables: List[Dict[str, str]]) -> List[Dict[str, str]]:
|
|
233
|
+
"""Get all existing partitions for the specified tables.
|
|
234
|
+
|
|
235
|
+
Args:
|
|
236
|
+
db: Database connection
|
|
237
|
+
database: Database name
|
|
238
|
+
tables: List of tables with keys: database, table
|
|
239
|
+
|
|
240
|
+
Returns:
|
|
241
|
+
List of partitions with keys: database, table, partition_name
|
|
242
|
+
"""
|
|
243
|
+
if not tables:
|
|
244
|
+
return []
|
|
245
|
+
|
|
246
|
+
db_tables = [t for t in tables if t['database'] == database]
|
|
247
|
+
if not db_tables:
|
|
248
|
+
return []
|
|
249
|
+
|
|
250
|
+
where_conditions = [f"DB_NAME = '{database}'", "PARTITION_NAME IS NOT NULL"]
|
|
251
|
+
|
|
252
|
+
table_conditions = []
|
|
253
|
+
for table in db_tables:
|
|
254
|
+
if table['table'] == '*':
|
|
255
|
+
pass
|
|
256
|
+
else:
|
|
257
|
+
table_conditions.append(f"TABLE_NAME = '{table['table']}'")
|
|
258
|
+
|
|
259
|
+
if table_conditions:
|
|
260
|
+
where_conditions.append("(" + " OR ".join(table_conditions) + ")")
|
|
261
|
+
|
|
262
|
+
where_clause = " AND ".join(where_conditions)
|
|
263
|
+
|
|
264
|
+
query = f"""
|
|
265
|
+
SELECT DB_NAME, TABLE_NAME, PARTITION_NAME
|
|
266
|
+
FROM information_schema.partitions_meta
|
|
267
|
+
WHERE {where_clause}
|
|
268
|
+
ORDER BY TABLE_NAME, PARTITION_NAME
|
|
269
|
+
"""
|
|
270
|
+
|
|
271
|
+
rows = db.query(query)
|
|
272
|
+
|
|
273
|
+
return [
|
|
274
|
+
{
|
|
275
|
+
"database": row[0],
|
|
276
|
+
"table": row[1],
|
|
277
|
+
"partition_name": row[2]
|
|
278
|
+
}
|
|
279
|
+
for row in rows
|
|
280
|
+
]
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def ensure_repository(db, name: str) -> None:
|
|
5
|
+
"""Verify that the specified repository exists and is accessible.
|
|
6
|
+
|
|
7
|
+
Args:
|
|
8
|
+
db: Database connection
|
|
9
|
+
name: Repository name to verify
|
|
10
|
+
|
|
11
|
+
Raises:
|
|
12
|
+
RuntimeError: If repository doesn't exist or has errors
|
|
13
|
+
"""
|
|
14
|
+
existing = _find_repository(db, name)
|
|
15
|
+
if not existing:
|
|
16
|
+
raise RuntimeError(
|
|
17
|
+
f"Repository '{name}' not found. Please create it first using:\n"
|
|
18
|
+
f" CREATE REPOSITORY {name} WITH BROKER ON LOCATION '...' PROPERTIES(...)\n"
|
|
19
|
+
f"For examples, see: https://docs.starrocks.io/docs/sql-reference/sql-statements/data-definition/backup_restore/CREATE_REPOSITORY/"
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
# SHOW REPOSITORIES returns: RepoId, RepoName, CreateTime, IsReadOnly, Location, Broker, ErrMsg
|
|
23
|
+
err_msg = existing[6]
|
|
24
|
+
if err_msg and str(err_msg).strip().upper() not in {"", "NULL", "NONE"}:
|
|
25
|
+
raise RuntimeError(f"Repository '{name}' has errors: {err_msg}")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _find_repository(db, name: str):
|
|
29
|
+
"""Find a repository by name in SHOW REPOSITORIES output."""
|
|
30
|
+
rows = db.query("SHOW REPOSITORIES")
|
|
31
|
+
for row in rows:
|
|
32
|
+
if row and row[1] == name:
|
|
33
|
+
return row
|
|
34
|
+
return None
|
|
35
|
+
|
|
36
|
+
|