starrocks-br 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,280 @@
1
+ from typing import List, Dict, Optional
2
+
3
+ from starrocks_br import logger
4
+
5
+
6
+ def find_latest_full_backup(db, database: str) -> Optional[Dict[str, str]]:
7
+ """Find the latest successful full backup for a database.
8
+
9
+ Args:
10
+ db: Database connection
11
+ database: Database name to search for
12
+
13
+ Returns:
14
+ Dictionary with keys: label, backup_type, finished_at, or None if no full backup found
15
+ """
16
+ query = f"""
17
+ SELECT label, backup_type, finished_at
18
+ FROM ops.backup_history
19
+ WHERE backup_type = 'full'
20
+ AND status = 'FINISHED'
21
+ AND label LIKE '{database}_%'
22
+ ORDER BY finished_at DESC
23
+ LIMIT 1
24
+ """
25
+
26
+ rows = db.query(query)
27
+
28
+ if not rows:
29
+ return None
30
+
31
+ row = rows[0]
32
+ return {
33
+ "label": row[0],
34
+ "backup_type": row[1],
35
+ "finished_at": row[2]
36
+ }
37
+
38
+
39
+ def find_tables_by_group(db, group_name: str) -> List[Dict[str, str]]:
40
+ """Find tables belonging to a specific inventory group.
41
+
42
+ Returns list of dictionaries with keys: database, table.
43
+ Supports '*' table wildcard which signifies all tables in a database.
44
+ """
45
+ query = f"""
46
+ SELECT database_name, table_name
47
+ FROM ops.table_inventory
48
+ WHERE inventory_group = '{group_name}'
49
+ ORDER BY database_name, table_name
50
+ """
51
+ rows = db.query(query)
52
+ return [
53
+ {"database": row[0], "table": row[1]} for row in rows
54
+ ]
55
+
56
+
57
+ def find_recent_partitions(db, database: str, baseline_backup_label: Optional[str] = None, *, group_name: str) -> List[Dict[str, str]]:
58
+ """Find partitions updated since baseline for tables in the given inventory group.
59
+
60
+ Args:
61
+ db: Database connection
62
+ database: Database name (StarRocks database scope for backup)
63
+ baseline_backup_label: Optional specific backup label to use as baseline.
64
+ group_name: Inventory group whose tables will be considered
65
+
66
+ Returns list of dictionaries with keys: database, table, partition_name.
67
+ Only partitions of tables within the specified database are returned.
68
+ """
69
+ if baseline_backup_label:
70
+ baseline_query = f"""
71
+ SELECT finished_at
72
+ FROM ops.backup_history
73
+ WHERE label = '{baseline_backup_label}'
74
+ AND status = 'FINISHED'
75
+ """
76
+ baseline_rows = db.query(baseline_query)
77
+ if not baseline_rows:
78
+ raise ValueError(f"Baseline backup '{baseline_backup_label}' not found or not successful")
79
+ baseline_time = baseline_rows[0][0]
80
+ else:
81
+ latest_backup = find_latest_full_backup(db, database)
82
+ if not latest_backup:
83
+ raise ValueError(f"No successful full backup found for database '{database}'. Run a full database backup first.")
84
+ baseline_time = latest_backup['finished_at']
85
+
86
+ if isinstance(baseline_time, str):
87
+ threshold_str = baseline_time
88
+ else:
89
+ threshold_str = baseline_time.strftime("%Y-%m-%d %H:%M:%S")
90
+
91
+ group_tables = find_tables_by_group(db, group_name)
92
+
93
+ if not group_tables:
94
+ return []
95
+
96
+ db_group_tables = [t for t in group_tables if t['database'] == database]
97
+
98
+ if not db_group_tables:
99
+ return []
100
+
101
+ concrete_tables = []
102
+ for table_entry in db_group_tables:
103
+ if table_entry['table'] == '*':
104
+ show_tables_query = f"SHOW TABLES FROM {table_entry['database']}"
105
+ tables_rows = db.query(show_tables_query)
106
+ for row in tables_rows:
107
+ concrete_tables.append({
108
+ 'database': table_entry['database'],
109
+ 'table': row[0]
110
+ })
111
+ else:
112
+ concrete_tables.append(table_entry)
113
+
114
+ recent_partitions = []
115
+ for table_entry in concrete_tables:
116
+ db_name = table_entry['database']
117
+ table_name = table_entry['table']
118
+
119
+ show_partitions_query = f"SHOW PARTITIONS FROM {db_name}.{table_name}"
120
+ try:
121
+ partition_rows = db.query(show_partitions_query)
122
+ except Exception as e:
123
+ logger.error(f"Error showing partitions for table {db_name}.{table_name}: {e}")
124
+ continue
125
+
126
+ for row in partition_rows:
127
+ # FOR SHARED NOTHING CLUSTER:
128
+ # PartitionId, PartitionName, VisibleVersion, VisibleVersionTime, VisibleVersionHash, State, PartitionKey, Range, DistributionKey, Buckets, ReplicationNum, StorageMedium, CooldownTime, LastConsistencyCheckTime, DataSize, StorageSize, IsInMemory, RowCount, DataVersion, VersionEpoch, VersionTxnType
129
+ partition_name = row[1]
130
+ visible_version_time = row[3]
131
+
132
+ if isinstance(visible_version_time, str):
133
+ version_time_str = visible_version_time
134
+ else:
135
+ version_time_str = visible_version_time.strftime("%Y-%m-%d %H:%M:%S")
136
+
137
+ if version_time_str > threshold_str:
138
+ recent_partitions.append({
139
+ 'database': db_name,
140
+ 'table': table_name,
141
+ 'partition_name': partition_name
142
+ })
143
+
144
+ return recent_partitions
145
+
146
+
147
+ def build_incremental_backup_command(partitions: List[Dict[str, str]], repository: str, label: str, database: str) -> str:
148
+ """Build BACKUP command for incremental backup of specific partitions.
149
+
150
+ Args:
151
+ partitions: List of partitions to backup
152
+ repository: Repository name
153
+ label: Backup label
154
+ database: Database name (StarRocks requires BACKUP to be database-specific)
155
+
156
+ Note: Filters partitions to only include those from the specified database.
157
+ """
158
+ if not partitions:
159
+ return ""
160
+
161
+ db_partitions = [p for p in partitions if p['database'] == database]
162
+
163
+ if not db_partitions:
164
+ return ""
165
+
166
+ table_partitions = {}
167
+ for partition in db_partitions:
168
+ table_name = partition['table']
169
+ if table_name not in table_partitions:
170
+ table_partitions[table_name] = []
171
+ table_partitions[table_name].append(partition['partition_name'])
172
+
173
+ on_clauses = []
174
+ for table, parts in table_partitions.items():
175
+ partitions_str = ", ".join(parts)
176
+ on_clauses.append(f"TABLE {table} PARTITION ({partitions_str})")
177
+
178
+ on_clause = ",\n ".join(on_clauses)
179
+
180
+ command = f"""BACKUP DATABASE {database} SNAPSHOT {label}
181
+ TO {repository}
182
+ ON ({on_clause})"""
183
+
184
+ return command
185
+
186
+
187
+ def build_full_backup_command(db, group_name: str, repository: str, label: str, database: str) -> str:
188
+ """Build BACKUP command for an inventory group.
189
+
190
+ If the group contains '*' for any entry in the target database, generate a
191
+ simple BACKUP DATABASE command. Otherwise, generate ON (TABLE ...) list for
192
+ the specific tables within the database.
193
+ """
194
+ tables = find_tables_by_group(db, group_name)
195
+
196
+ db_entries = [t for t in tables if t['database'] == database]
197
+ if not db_entries:
198
+ return ""
199
+
200
+ if any(t['table'] == '*' for t in db_entries):
201
+ return f"""BACKUP DATABASE {database} SNAPSHOT {label}
202
+ TO {repository}"""
203
+
204
+ on_clauses = []
205
+ for t in db_entries:
206
+ on_clauses.append(f"TABLE {t['table']}")
207
+ on_clause = ",\n ".join(on_clauses)
208
+ return f"""BACKUP DATABASE {database} SNAPSHOT {label}
209
+ TO {repository}
210
+ ON ({on_clause})"""
211
+
212
+
213
+ def record_backup_partitions(db, label: str, partitions: List[Dict[str, str]]) -> None:
214
+ """Record partition metadata for a backup in ops.backup_partitions table.
215
+
216
+ Args:
217
+ db: Database connection
218
+ label: Backup label
219
+ partitions: List of partitions with keys: database, table, partition_name
220
+ """
221
+ if not partitions:
222
+ return
223
+
224
+ for partition in partitions:
225
+ db.execute(f"""
226
+ INSERT INTO ops.backup_partitions
227
+ (label, database_name, table_name, partition_name)
228
+ VALUES ('{label}', '{partition['database']}', '{partition['table']}', '{partition['partition_name']}')
229
+ """)
230
+
231
+
232
+ def get_all_partitions_for_tables(db, database: str, tables: List[Dict[str, str]]) -> List[Dict[str, str]]:
233
+ """Get all existing partitions for the specified tables.
234
+
235
+ Args:
236
+ db: Database connection
237
+ database: Database name
238
+ tables: List of tables with keys: database, table
239
+
240
+ Returns:
241
+ List of partitions with keys: database, table, partition_name
242
+ """
243
+ if not tables:
244
+ return []
245
+
246
+ db_tables = [t for t in tables if t['database'] == database]
247
+ if not db_tables:
248
+ return []
249
+
250
+ where_conditions = [f"DB_NAME = '{database}'", "PARTITION_NAME IS NOT NULL"]
251
+
252
+ table_conditions = []
253
+ for table in db_tables:
254
+ if table['table'] == '*':
255
+ pass
256
+ else:
257
+ table_conditions.append(f"TABLE_NAME = '{table['table']}'")
258
+
259
+ if table_conditions:
260
+ where_conditions.append("(" + " OR ".join(table_conditions) + ")")
261
+
262
+ where_clause = " AND ".join(where_conditions)
263
+
264
+ query = f"""
265
+ SELECT DB_NAME, TABLE_NAME, PARTITION_NAME
266
+ FROM information_schema.partitions_meta
267
+ WHERE {where_clause}
268
+ ORDER BY TABLE_NAME, PARTITION_NAME
269
+ """
270
+
271
+ rows = db.query(query)
272
+
273
+ return [
274
+ {
275
+ "database": row[0],
276
+ "table": row[1],
277
+ "partition_name": row[2]
278
+ }
279
+ for row in rows
280
+ ]
@@ -0,0 +1,36 @@
1
+ from __future__ import annotations
2
+
3
+
4
+ def ensure_repository(db, name: str) -> None:
5
+ """Verify that the specified repository exists and is accessible.
6
+
7
+ Args:
8
+ db: Database connection
9
+ name: Repository name to verify
10
+
11
+ Raises:
12
+ RuntimeError: If repository doesn't exist or has errors
13
+ """
14
+ existing = _find_repository(db, name)
15
+ if not existing:
16
+ raise RuntimeError(
17
+ f"Repository '{name}' not found. Please create it first using:\n"
18
+ f" CREATE REPOSITORY {name} WITH BROKER ON LOCATION '...' PROPERTIES(...)\n"
19
+ f"For examples, see: https://docs.starrocks.io/docs/sql-reference/sql-statements/data-definition/backup_restore/CREATE_REPOSITORY/"
20
+ )
21
+
22
+ # SHOW REPOSITORIES returns: RepoId, RepoName, CreateTime, IsReadOnly, Location, Broker, ErrMsg
23
+ err_msg = existing[6]
24
+ if err_msg and str(err_msg).strip().upper() not in {"", "NULL", "NONE"}:
25
+ raise RuntimeError(f"Repository '{name}' has errors: {err_msg}")
26
+
27
+
28
+ def _find_repository(db, name: str):
29
+ """Find a repository by name in SHOW REPOSITORIES output."""
30
+ rows = db.query("SHOW REPOSITORIES")
31
+ for row in rows:
32
+ if row and row[1] == name:
33
+ return row
34
+ return None
35
+
36
+