starrocks-br 0.2.0__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- starrocks_br/cli.py +257 -193
- starrocks_br/concurrency.py +50 -50
- starrocks_br/config.py +31 -23
- starrocks_br/db.py +37 -37
- starrocks_br/executor.py +100 -71
- starrocks_br/health.py +1 -6
- starrocks_br/history.py +5 -6
- starrocks_br/labels.py +14 -10
- starrocks_br/planner.py +119 -113
- starrocks_br/repository.py +3 -5
- starrocks_br/restore.py +240 -187
- starrocks_br/schema.py +20 -16
- starrocks_br/timezone.py +28 -29
- starrocks_br/utils.py +86 -0
- starrocks_br-0.4.0.dist-info/METADATA +152 -0
- starrocks_br-0.4.0.dist-info/RECORD +21 -0
- starrocks_br-0.2.0.dist-info/METADATA +0 -12
- starrocks_br-0.2.0.dist-info/RECORD +0 -20
- {starrocks_br-0.2.0.dist-info → starrocks_br-0.4.0.dist-info}/WHEEL +0 -0
- {starrocks_br-0.2.0.dist-info → starrocks_br-0.4.0.dist-info}/entry_points.txt +0 -0
- {starrocks_br-0.2.0.dist-info → starrocks_br-0.4.0.dist-info}/top_level.txt +0 -0
starrocks_br/executor.py
CHANGED
|
@@ -1,14 +1,30 @@
|
|
|
1
|
-
import time
|
|
2
|
-
import datetime
|
|
3
1
|
import re
|
|
4
|
-
|
|
5
|
-
from
|
|
2
|
+
import time
|
|
3
|
+
from typing import Literal, Optional
|
|
4
|
+
|
|
5
|
+
from . import concurrency, history, logger, timezone
|
|
6
|
+
|
|
7
|
+
MAX_POLLS = 86400 # 1 day
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _calculate_next_interval(current_interval: float, max_interval: float) -> float:
|
|
11
|
+
"""Calculate the next polling interval using exponential backoff.
|
|
12
|
+
|
|
13
|
+
Args:
|
|
14
|
+
current_interval: Current polling interval in seconds
|
|
15
|
+
max_interval: Maximum allowed interval in seconds
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
Next interval (min of doubled current interval and max_interval)
|
|
19
|
+
"""
|
|
20
|
+
return min(current_interval * 2, max_interval)
|
|
6
21
|
|
|
7
|
-
MAX_POLLS = 86400 # 1 day
|
|
8
22
|
|
|
9
|
-
def submit_backup_command(
|
|
23
|
+
def submit_backup_command(
|
|
24
|
+
db, backup_command: str
|
|
25
|
+
) -> tuple[bool, Optional[str], Optional[dict[str, str]]]:
|
|
10
26
|
"""Submit a backup command to StarRocks.
|
|
11
|
-
|
|
27
|
+
|
|
12
28
|
Returns (success, error_message, error_details).
|
|
13
29
|
error_details is a dict with keys like 'error_type' and 'snapshot_name' for specific error cases.
|
|
14
30
|
"""
|
|
@@ -18,19 +34,16 @@ def submit_backup_command(db, backup_command: str) -> Tuple[bool, Optional[str],
|
|
|
18
34
|
except Exception as e:
|
|
19
35
|
error_str = str(e)
|
|
20
36
|
error_type = type(e).__name__
|
|
21
|
-
|
|
37
|
+
|
|
22
38
|
snapshot_exists_match = _check_snapshot_exists_error(e, error_str)
|
|
23
39
|
if snapshot_exists_match:
|
|
24
40
|
snapshot_name = snapshot_exists_match
|
|
25
|
-
error_details = {
|
|
26
|
-
'error_type': 'snapshot_exists',
|
|
27
|
-
'snapshot_name': snapshot_name
|
|
28
|
-
}
|
|
41
|
+
error_details = {"error_type": "snapshot_exists", "snapshot_name": snapshot_name}
|
|
29
42
|
error_msg = f"Snapshot '{snapshot_name}' already exists in repository"
|
|
30
43
|
logger.error(error_msg)
|
|
31
44
|
logger.error(f"backup_command: {backup_command}")
|
|
32
45
|
return False, error_msg, error_details
|
|
33
|
-
|
|
46
|
+
|
|
34
47
|
error_msg = f"Failed to submit backup command: {error_type}: {error_str}"
|
|
35
48
|
logger.error(error_msg)
|
|
36
49
|
logger.error(f"backup_command: {backup_command}")
|
|
@@ -39,48 +52,56 @@ def submit_backup_command(db, backup_command: str) -> Tuple[bool, Optional[str],
|
|
|
39
52
|
|
|
40
53
|
def _check_snapshot_exists_error(exception: Exception, error_str: str) -> Optional[str]:
|
|
41
54
|
"""Check if the error is a 'snapshot already exists' error and extract snapshot name.
|
|
42
|
-
|
|
55
|
+
|
|
43
56
|
Args:
|
|
44
57
|
exception: The exception that was raised
|
|
45
58
|
error_str: String representation of the error
|
|
46
|
-
|
|
59
|
+
|
|
47
60
|
Returns:
|
|
48
61
|
Snapshot name if this is a snapshot exists error, None otherwise
|
|
49
62
|
"""
|
|
50
63
|
snapshot_name_pattern = r"Snapshot with name '([^']+)' already exist"
|
|
51
64
|
error_lower = error_str.lower()
|
|
52
|
-
|
|
65
|
+
|
|
53
66
|
is_snapshot_exists_error = (
|
|
54
|
-
"already exist" in error_lower
|
|
55
|
-
"already exists" in error_lower
|
|
56
|
-
("5064" in error_str and "already exist" in error_lower)
|
|
57
|
-
(hasattr(exception,
|
|
67
|
+
"already exist" in error_lower
|
|
68
|
+
or "already exists" in error_lower
|
|
69
|
+
or ("5064" in error_str and "already exist" in error_lower)
|
|
70
|
+
or (hasattr(exception, "errno") and exception.errno == 5064)
|
|
58
71
|
)
|
|
59
|
-
|
|
72
|
+
|
|
60
73
|
if is_snapshot_exists_error:
|
|
61
74
|
match = re.search(snapshot_name_pattern, error_str, re.IGNORECASE)
|
|
62
75
|
if match:
|
|
63
76
|
return match.group(1)
|
|
64
|
-
|
|
77
|
+
|
|
65
78
|
return None
|
|
66
79
|
|
|
67
80
|
|
|
68
|
-
def poll_backup_status(
|
|
81
|
+
def poll_backup_status(
|
|
82
|
+
db,
|
|
83
|
+
label: str,
|
|
84
|
+
database: str,
|
|
85
|
+
max_polls: int = MAX_POLLS,
|
|
86
|
+
poll_interval: float = 1.0,
|
|
87
|
+
max_poll_interval: float = 60.0,
|
|
88
|
+
) -> dict[str, str]:
|
|
69
89
|
"""Poll backup status until completion or timeout.
|
|
70
|
-
|
|
90
|
+
|
|
71
91
|
Note: SHOW BACKUP only returns the LAST backup in a database.
|
|
72
92
|
We verify that the SnapshotName matches our expected label.
|
|
73
|
-
|
|
93
|
+
|
|
74
94
|
Important: If we see a different snapshot name, it means another backup
|
|
75
95
|
operation overwrote ours and we've lost tracking (race condition).
|
|
76
|
-
|
|
96
|
+
|
|
77
97
|
Args:
|
|
78
98
|
db: Database connection
|
|
79
99
|
label: Expected snapshot name (label) to monitor
|
|
80
100
|
database: Database name where backup was submitted
|
|
81
101
|
max_polls: Maximum number of polling attempts
|
|
82
|
-
poll_interval:
|
|
83
|
-
|
|
102
|
+
poll_interval: Initial seconds to wait between polls (exponentially increases)
|
|
103
|
+
max_poll_interval: Maximum interval between polls (default 60 seconds)
|
|
104
|
+
|
|
84
105
|
Returns dictionary with keys: state, label
|
|
85
106
|
Possible states: FINISHED, CANCELLED, TIMEOUT, ERROR, LOST
|
|
86
107
|
"""
|
|
@@ -88,47 +109,51 @@ def poll_backup_status(db, label: str, database: str, max_polls: int = MAX_POLLS
|
|
|
88
109
|
first_poll = True
|
|
89
110
|
last_state = None
|
|
90
111
|
poll_count = 0
|
|
91
|
-
|
|
112
|
+
current_interval = poll_interval
|
|
113
|
+
|
|
92
114
|
for _ in range(max_polls):
|
|
93
115
|
poll_count += 1
|
|
94
116
|
try:
|
|
95
117
|
rows = db.query(query)
|
|
96
|
-
|
|
118
|
+
|
|
97
119
|
if not rows:
|
|
98
|
-
time.sleep(
|
|
120
|
+
time.sleep(current_interval)
|
|
121
|
+
current_interval = _calculate_next_interval(current_interval, max_poll_interval)
|
|
99
122
|
continue
|
|
100
|
-
|
|
123
|
+
|
|
101
124
|
result = rows[0]
|
|
102
|
-
|
|
125
|
+
|
|
103
126
|
if isinstance(result, dict):
|
|
104
127
|
snapshot_name = result.get("SnapshotName", "")
|
|
105
128
|
state = result.get("State", "UNKNOWN")
|
|
106
129
|
else:
|
|
107
130
|
snapshot_name = result[1] if len(result) > 1 else ""
|
|
108
131
|
state = result[3] if len(result) > 3 else "UNKNOWN"
|
|
109
|
-
|
|
132
|
+
|
|
110
133
|
if snapshot_name != label:
|
|
111
134
|
if first_poll:
|
|
112
135
|
first_poll = False
|
|
113
|
-
time.sleep(
|
|
136
|
+
time.sleep(current_interval)
|
|
137
|
+
current_interval = _calculate_next_interval(current_interval, max_poll_interval)
|
|
114
138
|
continue
|
|
115
139
|
else:
|
|
116
140
|
return {"state": "LOST", "label": label}
|
|
117
|
-
|
|
141
|
+
|
|
118
142
|
first_poll = False
|
|
119
|
-
|
|
143
|
+
|
|
120
144
|
if state != last_state or poll_count % 10 == 0:
|
|
121
145
|
logger.progress(f"Backup status: {state} (poll {poll_count}/{max_polls})")
|
|
122
146
|
last_state = state
|
|
123
|
-
|
|
147
|
+
|
|
124
148
|
if state in ["FINISHED", "CANCELLED"]:
|
|
125
149
|
return {"state": state, "label": label}
|
|
126
|
-
|
|
127
|
-
time.sleep(
|
|
128
|
-
|
|
150
|
+
|
|
151
|
+
time.sleep(current_interval)
|
|
152
|
+
current_interval = _calculate_next_interval(current_interval, max_poll_interval)
|
|
153
|
+
|
|
129
154
|
except Exception:
|
|
130
155
|
return {"state": "ERROR", "label": label}
|
|
131
|
-
|
|
156
|
+
|
|
132
157
|
return {"state": "TIMEOUT", "label": label}
|
|
133
158
|
|
|
134
159
|
|
|
@@ -139,12 +164,12 @@ def execute_backup(
|
|
|
139
164
|
poll_interval: float = 1.0,
|
|
140
165
|
*,
|
|
141
166
|
repository: str,
|
|
142
|
-
backup_type: Literal[
|
|
167
|
+
backup_type: Literal["incremental", "full"] = None,
|
|
143
168
|
scope: str = "backup",
|
|
144
169
|
database: Optional[str] = None,
|
|
145
|
-
) ->
|
|
170
|
+
) -> dict:
|
|
146
171
|
"""Execute a complete backup workflow: submit command and monitor progress.
|
|
147
|
-
|
|
172
|
+
|
|
148
173
|
Args:
|
|
149
174
|
db: Database connection
|
|
150
175
|
backup_command: Backup SQL command to execute
|
|
@@ -154,31 +179,31 @@ def execute_backup(
|
|
|
154
179
|
backup_type: Type of backup (for logging)
|
|
155
180
|
scope: Job scope (for concurrency control)
|
|
156
181
|
database: Database name (required for SHOW BACKUP)
|
|
157
|
-
|
|
182
|
+
|
|
158
183
|
Returns dictionary with keys: success, final_status, error_message
|
|
159
184
|
"""
|
|
160
185
|
label = _extract_label_from_command(backup_command)
|
|
161
|
-
|
|
186
|
+
|
|
162
187
|
if not database:
|
|
163
188
|
database = _extract_database_from_command(backup_command)
|
|
164
189
|
|
|
165
190
|
cluster_tz = db.timezone
|
|
166
191
|
started_at = timezone.get_current_time_in_cluster_tz(cluster_tz)
|
|
167
|
-
|
|
192
|
+
|
|
168
193
|
success, submit_error, error_details = submit_backup_command(db, backup_command)
|
|
169
194
|
if not success:
|
|
170
195
|
result = {
|
|
171
196
|
"success": False,
|
|
172
197
|
"final_status": None,
|
|
173
|
-
"error_message": submit_error or "Failed to submit backup command (unknown error)"
|
|
198
|
+
"error_message": submit_error or "Failed to submit backup command (unknown error)",
|
|
174
199
|
}
|
|
175
200
|
if error_details:
|
|
176
201
|
result["error_details"] = error_details
|
|
177
202
|
return result
|
|
178
|
-
|
|
203
|
+
|
|
179
204
|
try:
|
|
180
205
|
final_status = poll_backup_status(db, label, database, max_polls, poll_interval)
|
|
181
|
-
|
|
206
|
+
|
|
182
207
|
success = final_status["state"] == "FINISHED"
|
|
183
208
|
|
|
184
209
|
try:
|
|
@@ -199,30 +224,34 @@ def execute_backup(
|
|
|
199
224
|
pass
|
|
200
225
|
|
|
201
226
|
try:
|
|
202
|
-
concurrency.complete_job_slot(
|
|
227
|
+
concurrency.complete_job_slot(
|
|
228
|
+
db, scope=scope, label=label, final_state=final_status["state"]
|
|
229
|
+
)
|
|
203
230
|
except Exception:
|
|
204
231
|
pass
|
|
205
|
-
|
|
232
|
+
|
|
206
233
|
return {
|
|
207
234
|
"success": success,
|
|
208
235
|
"final_status": final_status,
|
|
209
|
-
"error_message": None
|
|
236
|
+
"error_message": None
|
|
237
|
+
if success
|
|
238
|
+
else _build_error_message(final_status, label, database),
|
|
210
239
|
}
|
|
211
|
-
|
|
240
|
+
|
|
212
241
|
except Exception as e:
|
|
213
242
|
error_msg = f"Unexpected error during backup execution: {type(e).__name__}: {str(e)}"
|
|
214
243
|
logger.error(error_msg)
|
|
215
244
|
return {
|
|
216
245
|
"success": False,
|
|
217
246
|
"final_status": {"state": "ERROR", "label": label},
|
|
218
|
-
"error_message": error_msg
|
|
247
|
+
"error_message": error_msg,
|
|
219
248
|
}
|
|
220
249
|
|
|
221
250
|
|
|
222
|
-
def _build_error_message(final_status:
|
|
251
|
+
def _build_error_message(final_status: dict, label: str, database: str) -> str:
|
|
223
252
|
"""Build a descriptive error message based on backup final status."""
|
|
224
|
-
state = final_status.get(
|
|
225
|
-
|
|
253
|
+
state = final_status.get("state", "UNKNOWN")
|
|
254
|
+
|
|
226
255
|
if state == "LOST":
|
|
227
256
|
return (
|
|
228
257
|
f"Backup tracking lost for '{label}' in database '{database}'. "
|
|
@@ -254,42 +283,42 @@ def _build_error_message(final_status: Dict, label: str, database: str) -> str:
|
|
|
254
283
|
|
|
255
284
|
def _extract_label_from_command(backup_command: str) -> str:
|
|
256
285
|
"""Extract the snapshot label from a backup command.
|
|
257
|
-
|
|
286
|
+
|
|
258
287
|
This is a simple parser for StarRocks backup commands.
|
|
259
288
|
Handles both formats:
|
|
260
289
|
- BACKUP DATABASE db SNAPSHOT label TO repo
|
|
261
290
|
- BACKUP SNAPSHOT label TO repo (legacy)
|
|
262
291
|
"""
|
|
263
|
-
lines = backup_command.strip().split(
|
|
264
|
-
|
|
292
|
+
lines = backup_command.strip().split("\n")
|
|
293
|
+
|
|
265
294
|
for line in lines:
|
|
266
295
|
line = line.strip()
|
|
267
|
-
if line.startswith(
|
|
296
|
+
if line.startswith("BACKUP DATABASE"):
|
|
268
297
|
parts = line.split()
|
|
269
298
|
for i, part in enumerate(parts):
|
|
270
|
-
if part ==
|
|
299
|
+
if part == "SNAPSHOT" and i + 1 < len(parts):
|
|
271
300
|
return parts[i + 1]
|
|
272
|
-
elif line.startswith(
|
|
301
|
+
elif line.startswith("BACKUP SNAPSHOT"):
|
|
273
302
|
# Legacy syntax
|
|
274
303
|
parts = line.split()
|
|
275
304
|
if len(parts) >= 3:
|
|
276
305
|
return parts[2]
|
|
277
|
-
|
|
306
|
+
|
|
278
307
|
return "unknown_backup"
|
|
279
308
|
|
|
280
309
|
|
|
281
310
|
def _extract_database_from_command(backup_command: str) -> str:
|
|
282
311
|
"""Extract the database name from a backup command.
|
|
283
|
-
|
|
312
|
+
|
|
284
313
|
Parses: BACKUP DATABASE db_name SNAPSHOT label ...
|
|
285
314
|
"""
|
|
286
|
-
lines = backup_command.strip().split(
|
|
287
|
-
|
|
315
|
+
lines = backup_command.strip().split("\n")
|
|
316
|
+
|
|
288
317
|
for line in lines:
|
|
289
318
|
line = line.strip()
|
|
290
|
-
if line.startswith(
|
|
319
|
+
if line.startswith("BACKUP DATABASE"):
|
|
291
320
|
parts = line.split()
|
|
292
321
|
if len(parts) >= 3:
|
|
293
322
|
return parts[2]
|
|
294
|
-
|
|
323
|
+
|
|
295
324
|
return "unknown_database"
|
starrocks_br/health.py
CHANGED
|
@@ -1,7 +1,4 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
def check_cluster_health(db) -> Tuple[bool, str]:
|
|
1
|
+
def check_cluster_health(db) -> tuple[bool, str]:
|
|
5
2
|
"""Check FE/BE health via SHOW FRONTENDS/BACKENDS.
|
|
6
3
|
|
|
7
4
|
Returns (ok, message).
|
|
@@ -30,5 +27,3 @@ def check_cluster_health(db) -> Tuple[bool, str]:
|
|
|
30
27
|
if any_dead:
|
|
31
28
|
return False, "Cluster unhealthy: some FE/BE are DEAD or not READY"
|
|
32
29
|
return True, "Cluster healthy: all FE/BE are ALIVE and READY"
|
|
33
|
-
|
|
34
|
-
|
starrocks_br/history.py
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
|
-
from typing import
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
2
3
|
from . import logger
|
|
3
4
|
|
|
4
5
|
|
|
5
|
-
def log_backup(db, entry:
|
|
6
|
+
def log_backup(db, entry: dict[str, Optional[str]]) -> None:
|
|
6
7
|
"""Write a backup history entry to ops.backup_history.
|
|
7
8
|
|
|
8
9
|
Expected keys in entry:
|
|
@@ -36,7 +37,7 @@ def log_backup(db, entry: Dict[str, Optional[str]]) -> None:
|
|
|
36
37
|
{esc(started_at)}, {esc(finished_at)}, {esc(error_message)}
|
|
37
38
|
)
|
|
38
39
|
"""
|
|
39
|
-
|
|
40
|
+
|
|
40
41
|
try:
|
|
41
42
|
db.execute(sql)
|
|
42
43
|
except Exception as e:
|
|
@@ -44,7 +45,7 @@ def log_backup(db, entry: Dict[str, Optional[str]]) -> None:
|
|
|
44
45
|
raise
|
|
45
46
|
|
|
46
47
|
|
|
47
|
-
def log_restore(db, entry:
|
|
48
|
+
def log_restore(db, entry: dict[str, Optional[str]]) -> None:
|
|
48
49
|
"""Write a restore history entry to ops.restore_history.
|
|
49
50
|
|
|
50
51
|
Expected keys in entry:
|
|
@@ -89,5 +90,3 @@ def log_restore(db, entry: Dict[str, Optional[str]]) -> None:
|
|
|
89
90
|
except Exception as e:
|
|
90
91
|
logger.error(f"Failed to log restore history: {str(e)}")
|
|
91
92
|
raise
|
|
92
|
-
|
|
93
|
-
|
starrocks_br/labels.py
CHANGED
|
@@ -1,21 +1,26 @@
|
|
|
1
|
-
from typing import Optional, Literal
|
|
2
1
|
from datetime import datetime
|
|
2
|
+
from typing import Literal, Optional
|
|
3
3
|
|
|
4
4
|
|
|
5
|
-
def determine_backup_label(
|
|
5
|
+
def determine_backup_label(
|
|
6
|
+
db,
|
|
7
|
+
backup_type: Literal["incremental", "full"],
|
|
8
|
+
database_name: str,
|
|
9
|
+
custom_name: Optional[str] = None,
|
|
10
|
+
) -> str:
|
|
6
11
|
"""Determine a unique backup label for the given parameters.
|
|
7
|
-
|
|
12
|
+
|
|
8
13
|
This is the single entry point for all backup label generation. It handles both
|
|
9
14
|
custom names and auto-generated date-based labels, ensuring uniqueness by checking
|
|
10
15
|
the ops.backup_history table.
|
|
11
|
-
|
|
16
|
+
|
|
12
17
|
Args:
|
|
13
18
|
db: Database connection
|
|
14
19
|
backup_type: Type of backup (incremental, full)
|
|
15
20
|
database_name: Name of the database being backed up
|
|
16
21
|
custom_name: Optional custom name for the backup. If provided, this becomes
|
|
17
22
|
the base label. If None, generates a date-based label.
|
|
18
|
-
|
|
23
|
+
|
|
19
24
|
Returns:
|
|
20
25
|
Unique label string that doesn't conflict with existing backups
|
|
21
26
|
"""
|
|
@@ -24,26 +29,25 @@ def determine_backup_label(db, backup_type: Literal['incremental', 'full'], data
|
|
|
24
29
|
else:
|
|
25
30
|
today = datetime.now().strftime("%Y%m%d")
|
|
26
31
|
base_label = f"{database_name}_{today}_{backup_type}"
|
|
27
|
-
|
|
32
|
+
|
|
28
33
|
query = """
|
|
29
34
|
SELECT label
|
|
30
35
|
FROM ops.backup_history
|
|
31
36
|
WHERE label LIKE %s
|
|
32
37
|
ORDER BY label
|
|
33
38
|
"""
|
|
34
|
-
|
|
39
|
+
|
|
35
40
|
pattern = f"{base_label}%"
|
|
36
|
-
|
|
41
|
+
|
|
37
42
|
try:
|
|
38
43
|
rows = db.query(query, (pattern,))
|
|
39
44
|
existing_labels = [row[0] for row in rows] if rows else []
|
|
40
45
|
except Exception:
|
|
41
46
|
existing_labels = []
|
|
42
47
|
|
|
43
|
-
|
|
44
48
|
if base_label not in existing_labels:
|
|
45
49
|
return base_label
|
|
46
|
-
|
|
50
|
+
|
|
47
51
|
retry_count = 1
|
|
48
52
|
while True:
|
|
49
53
|
candidate_label = f"{base_label}_r{retry_count}"
|