starrocks-br 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,245 @@
1
+ import time
2
+ import datetime
3
+ from typing import Dict, Literal, Optional
4
+ from . import history, concurrency, logger
5
+
6
+ MAX_POLLS = 21600 # 6 hours
7
+
8
+ def submit_backup_command(db, backup_command: str) -> tuple[bool, Optional[str]]:
9
+ """Submit a backup command to StarRocks.
10
+
11
+ Returns (success, error_message).
12
+ """
13
+ try:
14
+ db.execute(backup_command.strip())
15
+ return True, None
16
+ except Exception as e:
17
+ error_msg = f"Failed to submit backup command: {type(e).__name__}: {str(e)}"
18
+ logger.error(error_msg)
19
+ logger.error(f"backup_command: {backup_command}")
20
+ return False, error_msg
21
+
22
+
23
+ def poll_backup_status(db, label: str, database: str, max_polls: int = MAX_POLLS, poll_interval: float = 1.0) -> Dict[str, str]:
24
+ """Poll backup status until completion or timeout.
25
+
26
+ Note: SHOW BACKUP only returns the LAST backup in a database.
27
+ We verify that the SnapshotName matches our expected label.
28
+
29
+ Important: If we see a different snapshot name, it means another backup
30
+ operation overwrote ours and we've lost tracking (race condition).
31
+
32
+ Args:
33
+ db: Database connection
34
+ label: Expected snapshot name (label) to monitor
35
+ database: Database name where backup was submitted
36
+ max_polls: Maximum number of polling attempts
37
+ poll_interval: Seconds to wait between polls
38
+
39
+ Returns dictionary with keys: state, label
40
+ Possible states: FINISHED, CANCELLED, TIMEOUT, ERROR, LOST
41
+ """
42
+ query = f"SHOW BACKUP FROM {database}"
43
+ first_poll = True
44
+ last_state = None
45
+ poll_count = 0
46
+
47
+ for _ in range(max_polls):
48
+ poll_count += 1
49
+ try:
50
+ rows = db.query(query)
51
+
52
+ if not rows:
53
+ time.sleep(poll_interval)
54
+ continue
55
+
56
+ result = rows[0]
57
+
58
+ if isinstance(result, dict):
59
+ snapshot_name = result.get("SnapshotName", "")
60
+ state = result.get("State", "UNKNOWN")
61
+ else:
62
+ snapshot_name = result[1] if len(result) > 1 else ""
63
+ state = result[3] if len(result) > 3 else "UNKNOWN"
64
+
65
+ if snapshot_name != label:
66
+ if first_poll:
67
+ first_poll = False
68
+ time.sleep(poll_interval)
69
+ continue
70
+ else:
71
+ return {"state": "LOST", "label": label}
72
+
73
+ first_poll = False
74
+
75
+ if state != last_state or poll_count % 10 == 0:
76
+ logger.progress(f"Backup status: {state} (poll {poll_count}/{max_polls})")
77
+ last_state = state
78
+
79
+ if state in ["FINISHED", "CANCELLED"]:
80
+ return {"state": state, "label": label}
81
+
82
+ time.sleep(poll_interval)
83
+
84
+ except Exception:
85
+ return {"state": "ERROR", "label": label}
86
+
87
+ return {"state": "TIMEOUT", "label": label}
88
+
89
+
90
+ def execute_backup(
91
+ db,
92
+ backup_command: str,
93
+ max_polls: int = MAX_POLLS,
94
+ poll_interval: float = 1.0,
95
+ *,
96
+ repository: str,
97
+ backup_type: Literal['incremental', 'full'] = None,
98
+ scope: str = "backup",
99
+ database: Optional[str] = None,
100
+ ) -> Dict:
101
+ """Execute a complete backup workflow: submit command and monitor progress.
102
+
103
+ Args:
104
+ db: Database connection
105
+ backup_command: Backup SQL command to execute
106
+ max_polls: Maximum polling attempts
107
+ poll_interval: Seconds between polls
108
+ repository: Repository name (for logging)
109
+ backup_type: Type of backup (for logging)
110
+ scope: Job scope (for concurrency control)
111
+ database: Database name (required for SHOW BACKUP)
112
+
113
+ Returns dictionary with keys: success, final_status, error_message
114
+ """
115
+ label = _extract_label_from_command(backup_command)
116
+
117
+ if not database:
118
+ database = _extract_database_from_command(backup_command)
119
+
120
+ started_at = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
121
+
122
+ success, submit_error = submit_backup_command(db, backup_command)
123
+ if not success:
124
+ return {
125
+ "success": False,
126
+ "final_status": None,
127
+ "error_message": submit_error or "Failed to submit backup command (unknown error)"
128
+ }
129
+
130
+ try:
131
+ final_status = poll_backup_status(db, label, database, max_polls, poll_interval)
132
+
133
+ success = final_status["state"] == "FINISHED"
134
+
135
+ try:
136
+ history.log_backup(
137
+ db,
138
+ {
139
+ "label": label,
140
+ "backup_type": backup_type,
141
+ "status": final_status["state"],
142
+ "repository": repository,
143
+ "started_at": started_at,
144
+ "finished_at": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
145
+ "error_message": None if success else (final_status["state"] or ""),
146
+ },
147
+ )
148
+ except Exception:
149
+ pass
150
+
151
+ try:
152
+ concurrency.complete_job_slot(db, scope=scope, label=label, final_state=final_status["state"])
153
+ except Exception:
154
+ pass
155
+
156
+ return {
157
+ "success": success,
158
+ "final_status": final_status,
159
+ "error_message": None if success else _build_error_message(final_status, label, database)
160
+ }
161
+
162
+ except Exception as e:
163
+ error_msg = f"Unexpected error during backup execution: {type(e).__name__}: {str(e)}"
164
+ logger.error(error_msg)
165
+ return {
166
+ "success": False,
167
+ "final_status": {"state": "ERROR", "label": label},
168
+ "error_message": error_msg
169
+ }
170
+
171
+
172
+ def _build_error_message(final_status: Dict, label: str, database: str) -> str:
173
+ """Build a descriptive error message based on backup final status."""
174
+ state = final_status.get('state', 'UNKNOWN')
175
+
176
+ if state == "LOST":
177
+ return (
178
+ f"Backup tracking lost for '{label}' in database '{database}'. "
179
+ f"Another backup operation overwrote the last backup status visible in SHOW BACKUP. "
180
+ f"This indicates a concurrency issue - only one backup per database should run at a time. "
181
+ f"Recommendation: Use ops.run_status concurrency control to prevent simultaneous backups, "
182
+ f"or verify if another tool/user is running backups on this database."
183
+ )
184
+ elif state == "CANCELLED":
185
+ return (
186
+ f"Backup '{label}' was cancelled by StarRocks. "
187
+ f"Check StarRocks logs for the reason (common causes: insufficient resources, storage issues, or manual cancellation)."
188
+ )
189
+ elif state == "TIMEOUT":
190
+ return (
191
+ f"Backup '{label}' monitoring timed out after {MAX_POLLS} polls. "
192
+ f"The backup may still be running in the background. "
193
+ f"Check SHOW BACKUP FROM {database} manually to see current status."
194
+ )
195
+ elif state == "ERROR":
196
+ return (
197
+ f"Error occurred while monitoring backup '{label}' status. "
198
+ f"The backup may have been submitted but monitoring failed. "
199
+ f"Check SHOW BACKUP FROM {database} and StarRocks logs for details."
200
+ )
201
+ else:
202
+ return f"Backup '{label}' failed with unexpected state: {state}"
203
+
204
+
205
+ def _extract_label_from_command(backup_command: str) -> str:
206
+ """Extract the snapshot label from a backup command.
207
+
208
+ This is a simple parser for StarRocks backup commands.
209
+ Handles both formats:
210
+ - BACKUP DATABASE db SNAPSHOT label TO repo
211
+ - BACKUP SNAPSHOT label TO repo (legacy)
212
+ """
213
+ lines = backup_command.strip().split('\n')
214
+
215
+ for line in lines:
216
+ line = line.strip()
217
+ if line.startswith('BACKUP DATABASE'):
218
+ parts = line.split()
219
+ for i, part in enumerate(parts):
220
+ if part == 'SNAPSHOT' and i + 1 < len(parts):
221
+ return parts[i + 1]
222
+ elif line.startswith('BACKUP SNAPSHOT'):
223
+ # Legacy syntax
224
+ parts = line.split()
225
+ if len(parts) >= 3:
226
+ return parts[2]
227
+
228
+ return "unknown_backup"
229
+
230
+
231
+ def _extract_database_from_command(backup_command: str) -> str:
232
+ """Extract the database name from a backup command.
233
+
234
+ Parses: BACKUP DATABASE db_name SNAPSHOT label ...
235
+ """
236
+ lines = backup_command.strip().split('\n')
237
+
238
+ for line in lines:
239
+ line = line.strip()
240
+ if line.startswith('BACKUP DATABASE'):
241
+ parts = line.split()
242
+ if len(parts) >= 3:
243
+ return parts[2]
244
+
245
+ return "unknown_database"
starrocks_br/health.py ADDED
@@ -0,0 +1,34 @@
1
+ from typing import Tuple
2
+
3
+
4
+ def check_cluster_health(db) -> Tuple[bool, str]:
5
+ """Check FE/BE health via SHOW FRONTENDS/BACKENDS.
6
+
7
+ Returns (ok, message).
8
+ """
9
+ fe_rows = db.query("SHOW FRONTENDS")
10
+ be_rows = db.query("SHOW BACKENDS")
11
+
12
+ def is_alive(value: str) -> bool:
13
+ return str(value).upper() in {"ALIVE", "TRUE", "YES", "1"}
14
+
15
+ any_dead = False
16
+ for row in fe_rows:
17
+ fe_joined_cluster = str(row[9]).upper() if len(row) > 9 else "TRUE"
18
+ fe_is_alive = str(row[10]).upper() if len(row) > 10 else "TRUE"
19
+ if not is_alive(fe_joined_cluster) or not is_alive(fe_is_alive):
20
+ any_dead = True
21
+ break
22
+
23
+ if not any_dead:
24
+ for row in be_rows:
25
+ be_is_alive = str(row[8]).upper() if len(row) > 8 else "TRUE"
26
+ if not is_alive(be_is_alive):
27
+ any_dead = True
28
+ break
29
+
30
+ if any_dead:
31
+ return False, "Cluster unhealthy: some FE/BE are DEAD or not READY"
32
+ return True, "Cluster healthy: all FE/BE are ALIVE and READY"
33
+
34
+
@@ -0,0 +1,93 @@
1
+ from typing import Dict, Optional
2
+ from . import logger
3
+
4
+
5
+ def log_backup(db, entry: Dict[str, Optional[str]]) -> None:
6
+ """Write a backup history entry to ops.backup_history.
7
+
8
+ Expected keys in entry:
9
+ - job_id (optional; auto-generated if missing)
10
+ - label
11
+ - backup_type (incremental|full)
12
+ - status (FINISHED|FAILED|CANCELLED)
13
+ - repository
14
+ - started_at (YYYY-MM-DD HH:MM:SS)
15
+ - finished_at (YYYY-MM-DD HH:MM:SS)
16
+ - error_message (nullable)
17
+ """
18
+ label = entry.get("label", "")
19
+ backup_type = entry.get("backup_type", "")
20
+ status = entry.get("status", "")
21
+ repository = entry.get("repository", "")
22
+ started_at = entry.get("started_at", "NULL")
23
+ finished_at = entry.get("finished_at", "NULL")
24
+ error_message = entry.get("error_message")
25
+
26
+ def esc(val: Optional[str]) -> str:
27
+ if val is None:
28
+ return "NULL"
29
+ return "'" + str(val).replace("'", "''") + "'"
30
+
31
+ sql = f"""
32
+ INSERT INTO ops.backup_history (
33
+ label, backup_type, status, repository, started_at, finished_at, error_message
34
+ ) VALUES (
35
+ {esc(label)}, {esc(backup_type)}, {esc(status)}, {esc(repository)},
36
+ {esc(started_at)}, {esc(finished_at)}, {esc(error_message)}
37
+ )
38
+ """
39
+
40
+ try:
41
+ db.execute(sql)
42
+ except Exception as e:
43
+ logger.error(f"Failed to log backup history: {str(e)}")
44
+ raise
45
+
46
+
47
+ def log_restore(db, entry: Dict[str, Optional[str]]) -> None:
48
+ """Write a restore history entry to ops.restore_history.
49
+
50
+ Expected keys in entry:
51
+ - job_id
52
+ - backup_label
53
+ - restore_type (partition|table|database)
54
+ - status (FINISHED|FAILED|CANCELLED)
55
+ - repository
56
+ - started_at (YYYY-MM-DD HH:MM:SS)
57
+ - finished_at (YYYY-MM-DD HH:MM:SS)
58
+ - error_message (nullable)
59
+ - verification_checksum (optional)
60
+ """
61
+ job_id = entry.get("job_id", "")
62
+ backup_label = entry.get("backup_label", "")
63
+ restore_type = entry.get("restore_type", "")
64
+ status = entry.get("status", "")
65
+ repository = entry.get("repository", "")
66
+ started_at = entry.get("started_at", "NULL")
67
+ finished_at = entry.get("finished_at", "NULL")
68
+ error_message = entry.get("error_message")
69
+ verification_checksum = entry.get("verification_checksum")
70
+
71
+ def esc(val: Optional[str]) -> str:
72
+ if val is None:
73
+ return "NULL"
74
+ return "'" + str(val).replace("'", "''") + "'"
75
+
76
+ sql = f"""
77
+ INSERT INTO ops.restore_history (
78
+ job_id, backup_label, restore_type, status, repository,
79
+ started_at, finished_at, error_message, verification_checksum
80
+ ) VALUES (
81
+ {esc(job_id)}, {esc(backup_label)}, {esc(restore_type)}, {esc(status)},
82
+ {esc(repository)}, {esc(started_at)}, {esc(finished_at)},
83
+ {esc(error_message)}, {esc(verification_checksum)}
84
+ )
85
+ """
86
+
87
+ try:
88
+ db.execute(sql)
89
+ except Exception as e:
90
+ logger.error(f"Failed to log restore history: {str(e)}")
91
+ raise
92
+
93
+
starrocks_br/labels.py ADDED
@@ -0,0 +1,52 @@
1
+ from typing import Optional, Literal
2
+ from datetime import datetime
3
+
4
+
5
+ def determine_backup_label(db, backup_type: Literal['incremental', 'full'], database_name: str, custom_name: Optional[str] = None) -> str:
6
+ """Determine a unique backup label for the given parameters.
7
+
8
+ This is the single entry point for all backup label generation. It handles both
9
+ custom names and auto-generated date-based labels, ensuring uniqueness by checking
10
+ the ops.backup_history table.
11
+
12
+ Args:
13
+ db: Database connection
14
+ backup_type: Type of backup (incremental, full)
15
+ database_name: Name of the database being backed up
16
+ custom_name: Optional custom name for the backup. If provided, this becomes
17
+ the base label. If None, generates a date-based label.
18
+
19
+ Returns:
20
+ Unique label string that doesn't conflict with existing backups
21
+ """
22
+ if custom_name:
23
+ base_label = custom_name
24
+ else:
25
+ today = datetime.now().strftime("%Y%m%d")
26
+ base_label = f"{database_name}_{today}_{backup_type}"
27
+
28
+ query = """
29
+ SELECT label
30
+ FROM ops.backup_history
31
+ WHERE label LIKE %s
32
+ ORDER BY label
33
+ """
34
+
35
+ pattern = f"{base_label}%"
36
+
37
+ try:
38
+ rows = db.query(query, (pattern,))
39
+ existing_labels = [row[0] for row in rows] if rows else []
40
+ except Exception:
41
+ existing_labels = []
42
+
43
+
44
+ if base_label not in existing_labels:
45
+ return base_label
46
+
47
+ retry_count = 1
48
+ while True:
49
+ candidate_label = f"{base_label}_r{retry_count}"
50
+ if candidate_label not in existing_labels:
51
+ return candidate_label
52
+ retry_count += 1
starrocks_br/logger.py ADDED
@@ -0,0 +1,36 @@
1
+ import click
2
+
3
+
4
+ def info(message: str) -> None:
5
+ """Log an informational message."""
6
+ click.echo(message)
7
+
8
+
9
+ def success(message: str) -> None:
10
+ """Log a success message with checkmark."""
11
+ click.echo(f"✓ {message}")
12
+
13
+
14
+ def warning(message: str) -> None:
15
+ """Log a warning message with warning symbol."""
16
+ click.echo(f"⚠ {message}", err=True)
17
+
18
+
19
+ def error(message: str) -> None:
20
+ """Log an error message with error prefix."""
21
+ click.echo(f"Error: {message}", err=True)
22
+
23
+
24
+ def critical(message: str) -> None:
25
+ """Log a critical error message with critical symbol."""
26
+ click.echo(f"❌ CRITICAL: {message}", err=True)
27
+
28
+
29
+ def progress(message: str) -> None:
30
+ """Log a progress message with hourglass symbol."""
31
+ click.echo(f"⏳ {message}")
32
+
33
+
34
+ def tip(message: str) -> None:
35
+ """Log a tip message with lightbulb symbol."""
36
+ click.echo(f"💡 {message}", err=True)