starrocks-br 0.2.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
starrocks_br/executor.py CHANGED
@@ -1,14 +1,30 @@
1
- import time
2
- import datetime
3
1
  import re
4
- from typing import Dict, Literal, Optional, Tuple
5
- from . import history, concurrency, logger, timezone
2
+ import time
3
+ from typing import Literal, Optional
4
+
5
+ from . import concurrency, history, logger, timezone
6
+
7
+ MAX_POLLS = 86400 # 1 day
8
+
9
+
10
+ def _calculate_next_interval(current_interval: float, max_interval: float) -> float:
11
+ """Calculate the next polling interval using exponential backoff.
12
+
13
+ Args:
14
+ current_interval: Current polling interval in seconds
15
+ max_interval: Maximum allowed interval in seconds
16
+
17
+ Returns:
18
+ Next interval (min of doubled current interval and max_interval)
19
+ """
20
+ return min(current_interval * 2, max_interval)
6
21
 
7
- MAX_POLLS = 86400 # 1 day
8
22
 
9
- def submit_backup_command(db, backup_command: str) -> Tuple[bool, Optional[str], Optional[Dict[str, str]]]:
23
+ def submit_backup_command(
24
+ db, backup_command: str
25
+ ) -> tuple[bool, Optional[str], Optional[dict[str, str]]]:
10
26
  """Submit a backup command to StarRocks.
11
-
27
+
12
28
  Returns (success, error_message, error_details).
13
29
  error_details is a dict with keys like 'error_type' and 'snapshot_name' for specific error cases.
14
30
  """
@@ -18,19 +34,16 @@ def submit_backup_command(db, backup_command: str) -> Tuple[bool, Optional[str],
18
34
  except Exception as e:
19
35
  error_str = str(e)
20
36
  error_type = type(e).__name__
21
-
37
+
22
38
  snapshot_exists_match = _check_snapshot_exists_error(e, error_str)
23
39
  if snapshot_exists_match:
24
40
  snapshot_name = snapshot_exists_match
25
- error_details = {
26
- 'error_type': 'snapshot_exists',
27
- 'snapshot_name': snapshot_name
28
- }
41
+ error_details = {"error_type": "snapshot_exists", "snapshot_name": snapshot_name}
29
42
  error_msg = f"Snapshot '{snapshot_name}' already exists in repository"
30
43
  logger.error(error_msg)
31
44
  logger.error(f"backup_command: {backup_command}")
32
45
  return False, error_msg, error_details
33
-
46
+
34
47
  error_msg = f"Failed to submit backup command: {error_type}: {error_str}"
35
48
  logger.error(error_msg)
36
49
  logger.error(f"backup_command: {backup_command}")
@@ -39,48 +52,56 @@ def submit_backup_command(db, backup_command: str) -> Tuple[bool, Optional[str],
39
52
 
40
53
  def _check_snapshot_exists_error(exception: Exception, error_str: str) -> Optional[str]:
41
54
  """Check if the error is a 'snapshot already exists' error and extract snapshot name.
42
-
55
+
43
56
  Args:
44
57
  exception: The exception that was raised
45
58
  error_str: String representation of the error
46
-
59
+
47
60
  Returns:
48
61
  Snapshot name if this is a snapshot exists error, None otherwise
49
62
  """
50
63
  snapshot_name_pattern = r"Snapshot with name '([^']+)' already exist"
51
64
  error_lower = error_str.lower()
52
-
65
+
53
66
  is_snapshot_exists_error = (
54
- "already exist" in error_lower or
55
- "already exists" in error_lower or
56
- ("5064" in error_str and "already exist" in error_lower) or
57
- (hasattr(exception, 'errno') and exception.errno == 5064)
67
+ "already exist" in error_lower
68
+ or "already exists" in error_lower
69
+ or ("5064" in error_str and "already exist" in error_lower)
70
+ or (hasattr(exception, "errno") and exception.errno == 5064)
58
71
  )
59
-
72
+
60
73
  if is_snapshot_exists_error:
61
74
  match = re.search(snapshot_name_pattern, error_str, re.IGNORECASE)
62
75
  if match:
63
76
  return match.group(1)
64
-
77
+
65
78
  return None
66
79
 
67
80
 
68
- def poll_backup_status(db, label: str, database: str, max_polls: int = MAX_POLLS, poll_interval: float = 1.0) -> Dict[str, str]:
81
+ def poll_backup_status(
82
+ db,
83
+ label: str,
84
+ database: str,
85
+ max_polls: int = MAX_POLLS,
86
+ poll_interval: float = 1.0,
87
+ max_poll_interval: float = 60.0,
88
+ ) -> dict[str, str]:
69
89
  """Poll backup status until completion or timeout.
70
-
90
+
71
91
  Note: SHOW BACKUP only returns the LAST backup in a database.
72
92
  We verify that the SnapshotName matches our expected label.
73
-
93
+
74
94
  Important: If we see a different snapshot name, it means another backup
75
95
  operation overwrote ours and we've lost tracking (race condition).
76
-
96
+
77
97
  Args:
78
98
  db: Database connection
79
99
  label: Expected snapshot name (label) to monitor
80
100
  database: Database name where backup was submitted
81
101
  max_polls: Maximum number of polling attempts
82
- poll_interval: Seconds to wait between polls
83
-
102
+ poll_interval: Initial seconds to wait between polls (exponentially increases)
103
+ max_poll_interval: Maximum interval between polls (default 60 seconds)
104
+
84
105
  Returns dictionary with keys: state, label
85
106
  Possible states: FINISHED, CANCELLED, TIMEOUT, ERROR, LOST
86
107
  """
@@ -88,47 +109,51 @@ def poll_backup_status(db, label: str, database: str, max_polls: int = MAX_POLLS
88
109
  first_poll = True
89
110
  last_state = None
90
111
  poll_count = 0
91
-
112
+ current_interval = poll_interval
113
+
92
114
  for _ in range(max_polls):
93
115
  poll_count += 1
94
116
  try:
95
117
  rows = db.query(query)
96
-
118
+
97
119
  if not rows:
98
- time.sleep(poll_interval)
120
+ time.sleep(current_interval)
121
+ current_interval = _calculate_next_interval(current_interval, max_poll_interval)
99
122
  continue
100
-
123
+
101
124
  result = rows[0]
102
-
125
+
103
126
  if isinstance(result, dict):
104
127
  snapshot_name = result.get("SnapshotName", "")
105
128
  state = result.get("State", "UNKNOWN")
106
129
  else:
107
130
  snapshot_name = result[1] if len(result) > 1 else ""
108
131
  state = result[3] if len(result) > 3 else "UNKNOWN"
109
-
132
+
110
133
  if snapshot_name != label:
111
134
  if first_poll:
112
135
  first_poll = False
113
- time.sleep(poll_interval)
136
+ time.sleep(current_interval)
137
+ current_interval = _calculate_next_interval(current_interval, max_poll_interval)
114
138
  continue
115
139
  else:
116
140
  return {"state": "LOST", "label": label}
117
-
141
+
118
142
  first_poll = False
119
-
143
+
120
144
  if state != last_state or poll_count % 10 == 0:
121
145
  logger.progress(f"Backup status: {state} (poll {poll_count}/{max_polls})")
122
146
  last_state = state
123
-
147
+
124
148
  if state in ["FINISHED", "CANCELLED"]:
125
149
  return {"state": state, "label": label}
126
-
127
- time.sleep(poll_interval)
128
-
150
+
151
+ time.sleep(current_interval)
152
+ current_interval = _calculate_next_interval(current_interval, max_poll_interval)
153
+
129
154
  except Exception:
130
155
  return {"state": "ERROR", "label": label}
131
-
156
+
132
157
  return {"state": "TIMEOUT", "label": label}
133
158
 
134
159
 
@@ -139,12 +164,12 @@ def execute_backup(
139
164
  poll_interval: float = 1.0,
140
165
  *,
141
166
  repository: str,
142
- backup_type: Literal['incremental', 'full'] = None,
167
+ backup_type: Literal["incremental", "full"] = None,
143
168
  scope: str = "backup",
144
169
  database: Optional[str] = None,
145
- ) -> Dict:
170
+ ) -> dict:
146
171
  """Execute a complete backup workflow: submit command and monitor progress.
147
-
172
+
148
173
  Args:
149
174
  db: Database connection
150
175
  backup_command: Backup SQL command to execute
@@ -154,31 +179,31 @@ def execute_backup(
154
179
  backup_type: Type of backup (for logging)
155
180
  scope: Job scope (for concurrency control)
156
181
  database: Database name (required for SHOW BACKUP)
157
-
182
+
158
183
  Returns dictionary with keys: success, final_status, error_message
159
184
  """
160
185
  label = _extract_label_from_command(backup_command)
161
-
186
+
162
187
  if not database:
163
188
  database = _extract_database_from_command(backup_command)
164
189
 
165
190
  cluster_tz = db.timezone
166
191
  started_at = timezone.get_current_time_in_cluster_tz(cluster_tz)
167
-
192
+
168
193
  success, submit_error, error_details = submit_backup_command(db, backup_command)
169
194
  if not success:
170
195
  result = {
171
196
  "success": False,
172
197
  "final_status": None,
173
- "error_message": submit_error or "Failed to submit backup command (unknown error)"
198
+ "error_message": submit_error or "Failed to submit backup command (unknown error)",
174
199
  }
175
200
  if error_details:
176
201
  result["error_details"] = error_details
177
202
  return result
178
-
203
+
179
204
  try:
180
205
  final_status = poll_backup_status(db, label, database, max_polls, poll_interval)
181
-
206
+
182
207
  success = final_status["state"] == "FINISHED"
183
208
 
184
209
  try:
@@ -199,30 +224,34 @@ def execute_backup(
199
224
  pass
200
225
 
201
226
  try:
202
- concurrency.complete_job_slot(db, scope=scope, label=label, final_state=final_status["state"])
227
+ concurrency.complete_job_slot(
228
+ db, scope=scope, label=label, final_state=final_status["state"]
229
+ )
203
230
  except Exception:
204
231
  pass
205
-
232
+
206
233
  return {
207
234
  "success": success,
208
235
  "final_status": final_status,
209
- "error_message": None if success else _build_error_message(final_status, label, database)
236
+ "error_message": None
237
+ if success
238
+ else _build_error_message(final_status, label, database),
210
239
  }
211
-
240
+
212
241
  except Exception as e:
213
242
  error_msg = f"Unexpected error during backup execution: {type(e).__name__}: {str(e)}"
214
243
  logger.error(error_msg)
215
244
  return {
216
245
  "success": False,
217
246
  "final_status": {"state": "ERROR", "label": label},
218
- "error_message": error_msg
247
+ "error_message": error_msg,
219
248
  }
220
249
 
221
250
 
222
- def _build_error_message(final_status: Dict, label: str, database: str) -> str:
251
+ def _build_error_message(final_status: dict, label: str, database: str) -> str:
223
252
  """Build a descriptive error message based on backup final status."""
224
- state = final_status.get('state', 'UNKNOWN')
225
-
253
+ state = final_status.get("state", "UNKNOWN")
254
+
226
255
  if state == "LOST":
227
256
  return (
228
257
  f"Backup tracking lost for '{label}' in database '{database}'. "
@@ -254,42 +283,42 @@ def _build_error_message(final_status: Dict, label: str, database: str) -> str:
254
283
 
255
284
  def _extract_label_from_command(backup_command: str) -> str:
256
285
  """Extract the snapshot label from a backup command.
257
-
286
+
258
287
  This is a simple parser for StarRocks backup commands.
259
288
  Handles both formats:
260
289
  - BACKUP DATABASE db SNAPSHOT label TO repo
261
290
  - BACKUP SNAPSHOT label TO repo (legacy)
262
291
  """
263
- lines = backup_command.strip().split('\n')
264
-
292
+ lines = backup_command.strip().split("\n")
293
+
265
294
  for line in lines:
266
295
  line = line.strip()
267
- if line.startswith('BACKUP DATABASE'):
296
+ if line.startswith("BACKUP DATABASE"):
268
297
  parts = line.split()
269
298
  for i, part in enumerate(parts):
270
- if part == 'SNAPSHOT' and i + 1 < len(parts):
299
+ if part == "SNAPSHOT" and i + 1 < len(parts):
271
300
  return parts[i + 1]
272
- elif line.startswith('BACKUP SNAPSHOT'):
301
+ elif line.startswith("BACKUP SNAPSHOT"):
273
302
  # Legacy syntax
274
303
  parts = line.split()
275
304
  if len(parts) >= 3:
276
305
  return parts[2]
277
-
306
+
278
307
  return "unknown_backup"
279
308
 
280
309
 
281
310
  def _extract_database_from_command(backup_command: str) -> str:
282
311
  """Extract the database name from a backup command.
283
-
312
+
284
313
  Parses: BACKUP DATABASE db_name SNAPSHOT label ...
285
314
  """
286
- lines = backup_command.strip().split('\n')
287
-
315
+ lines = backup_command.strip().split("\n")
316
+
288
317
  for line in lines:
289
318
  line = line.strip()
290
- if line.startswith('BACKUP DATABASE'):
319
+ if line.startswith("BACKUP DATABASE"):
291
320
  parts = line.split()
292
321
  if len(parts) >= 3:
293
322
  return parts[2]
294
-
323
+
295
324
  return "unknown_database"
starrocks_br/health.py CHANGED
@@ -1,7 +1,4 @@
1
- from typing import Tuple
2
-
3
-
4
- def check_cluster_health(db) -> Tuple[bool, str]:
1
+ def check_cluster_health(db) -> tuple[bool, str]:
5
2
  """Check FE/BE health via SHOW FRONTENDS/BACKENDS.
6
3
 
7
4
  Returns (ok, message).
@@ -30,5 +27,3 @@ def check_cluster_health(db) -> Tuple[bool, str]:
30
27
  if any_dead:
31
28
  return False, "Cluster unhealthy: some FE/BE are DEAD or not READY"
32
29
  return True, "Cluster healthy: all FE/BE are ALIVE and READY"
33
-
34
-
starrocks_br/history.py CHANGED
@@ -1,8 +1,9 @@
1
- from typing import Dict, Optional
1
+ from typing import Optional
2
+
2
3
  from . import logger
3
4
 
4
5
 
5
- def log_backup(db, entry: Dict[str, Optional[str]]) -> None:
6
+ def log_backup(db, entry: dict[str, Optional[str]]) -> None:
6
7
  """Write a backup history entry to ops.backup_history.
7
8
 
8
9
  Expected keys in entry:
@@ -36,7 +37,7 @@ def log_backup(db, entry: Dict[str, Optional[str]]) -> None:
36
37
  {esc(started_at)}, {esc(finished_at)}, {esc(error_message)}
37
38
  )
38
39
  """
39
-
40
+
40
41
  try:
41
42
  db.execute(sql)
42
43
  except Exception as e:
@@ -44,7 +45,7 @@ def log_backup(db, entry: Dict[str, Optional[str]]) -> None:
44
45
  raise
45
46
 
46
47
 
47
- def log_restore(db, entry: Dict[str, Optional[str]]) -> None:
48
+ def log_restore(db, entry: dict[str, Optional[str]]) -> None:
48
49
  """Write a restore history entry to ops.restore_history.
49
50
 
50
51
  Expected keys in entry:
@@ -89,5 +90,3 @@ def log_restore(db, entry: Dict[str, Optional[str]]) -> None:
89
90
  except Exception as e:
90
91
  logger.error(f"Failed to log restore history: {str(e)}")
91
92
  raise
92
-
93
-
starrocks_br/labels.py CHANGED
@@ -1,21 +1,26 @@
1
- from typing import Optional, Literal
2
1
  from datetime import datetime
2
+ from typing import Literal, Optional
3
3
 
4
4
 
5
- def determine_backup_label(db, backup_type: Literal['incremental', 'full'], database_name: str, custom_name: Optional[str] = None) -> str:
5
+ def determine_backup_label(
6
+ db,
7
+ backup_type: Literal["incremental", "full"],
8
+ database_name: str,
9
+ custom_name: Optional[str] = None,
10
+ ) -> str:
6
11
  """Determine a unique backup label for the given parameters.
7
-
12
+
8
13
  This is the single entry point for all backup label generation. It handles both
9
14
  custom names and auto-generated date-based labels, ensuring uniqueness by checking
10
15
  the ops.backup_history table.
11
-
16
+
12
17
  Args:
13
18
  db: Database connection
14
19
  backup_type: Type of backup (incremental, full)
15
20
  database_name: Name of the database being backed up
16
21
  custom_name: Optional custom name for the backup. If provided, this becomes
17
22
  the base label. If None, generates a date-based label.
18
-
23
+
19
24
  Returns:
20
25
  Unique label string that doesn't conflict with existing backups
21
26
  """
@@ -24,26 +29,25 @@ def determine_backup_label(db, backup_type: Literal['incremental', 'full'], data
24
29
  else:
25
30
  today = datetime.now().strftime("%Y%m%d")
26
31
  base_label = f"{database_name}_{today}_{backup_type}"
27
-
32
+
28
33
  query = """
29
34
  SELECT label
30
35
  FROM ops.backup_history
31
36
  WHERE label LIKE %s
32
37
  ORDER BY label
33
38
  """
34
-
39
+
35
40
  pattern = f"{base_label}%"
36
-
41
+
37
42
  try:
38
43
  rows = db.query(query, (pattern,))
39
44
  existing_labels = [row[0] for row in rows] if rows else []
40
45
  except Exception:
41
46
  existing_labels = []
42
47
 
43
-
44
48
  if base_label not in existing_labels:
45
49
  return base_label
46
-
50
+
47
51
  retry_count = 1
48
52
  while True:
49
53
  candidate_label = f"{base_label}_r{retry_count}"