starrocks-br 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
starrocks_br/planner.py CHANGED
@@ -1,17 +1,16 @@
1
- from typing import List, Dict, Optional
2
1
  import datetime
3
2
  import hashlib
4
3
 
5
- from starrocks_br import logger, timezone
4
+ from starrocks_br import logger, timezone, utils
6
5
 
7
6
 
8
- def find_latest_full_backup(db, database: str) -> Optional[Dict[str, str]]:
7
+ def find_latest_full_backup(db, database: str) -> dict[str, str] | None:
9
8
  """Find the latest successful full backup for a database.
10
-
9
+
11
10
  Args:
12
11
  db: Database connection
13
12
  database: Database name to search for
14
-
13
+
15
14
  Returns:
16
15
  Dictionary with keys: label, backup_type, finished_at, or None if no full backup found.
17
16
  The finished_at value is returned as a string in the cluster timezone format.
@@ -21,125 +20,126 @@ def find_latest_full_backup(db, database: str) -> Optional[Dict[str, str]]:
21
20
  FROM ops.backup_history
22
21
  WHERE backup_type = 'full'
23
22
  AND status = 'FINISHED'
24
- AND label LIKE '{database}_%'
23
+ AND label LIKE {utils.quote_value(f"{database}_%")}
25
24
  ORDER BY finished_at DESC
26
25
  LIMIT 1
27
26
  """
28
-
27
+
29
28
  rows = db.query(query)
30
-
29
+
31
30
  if not rows:
32
31
  return None
33
-
32
+
34
33
  row = rows[0]
35
34
  finished_at = row[2]
36
-
35
+
37
36
  if isinstance(finished_at, datetime.datetime):
38
- cluster_tz = db.timezone
39
- finished_at = finished_at.strftime("%Y-%m-%d %H:%M:%S")
37
+ finished_at_normalized = timezone.normalize_datetime_to_tz(finished_at, db.timezone)
38
+ finished_at = finished_at_normalized.strftime("%Y-%m-%d %H:%M:%S")
40
39
  elif not isinstance(finished_at, str):
41
40
  finished_at = str(finished_at)
42
-
43
- return {
44
- "label": row[0],
45
- "backup_type": row[1],
46
- "finished_at": finished_at
47
- }
48
41
 
42
+ return {"label": row[0], "backup_type": row[1], "finished_at": finished_at}
49
43
 
50
- def find_tables_by_group(db, group_name: str) -> List[Dict[str, str]]:
44
+
45
+ def find_tables_by_group(db, group_name: str) -> list[dict[str, str]]:
51
46
  """Find tables belonging to a specific inventory group.
52
-
47
+
53
48
  Returns list of dictionaries with keys: database, table.
54
49
  Supports '*' table wildcard which signifies all tables in a database.
55
50
  """
56
51
  query = f"""
57
52
  SELECT database_name, table_name
58
53
  FROM ops.table_inventory
59
- WHERE inventory_group = '{group_name}'
54
+ WHERE inventory_group = {utils.quote_value(group_name)}
60
55
  ORDER BY database_name, table_name
61
56
  """
62
57
  rows = db.query(query)
63
- return [
64
- {"database": row[0], "table": row[1]} for row in rows
65
- ]
58
+ return [{"database": row[0], "table": row[1]} for row in rows]
66
59
 
67
60
 
68
- def find_recent_partitions(db, database: str, baseline_backup_label: Optional[str] = None, *, group_name: str) -> List[Dict[str, str]]:
61
+ def find_recent_partitions(
62
+ db, database: str, baseline_backup_label: str | None = None, *, group_name: str
63
+ ) -> list[dict[str, str]]:
69
64
  """Find partitions updated since baseline for tables in the given inventory group.
70
-
65
+
71
66
  Args:
72
67
  db: Database connection
73
68
  database: Database name (StarRocks database scope for backup)
74
69
  baseline_backup_label: Optional specific backup label to use as baseline.
75
70
  group_name: Inventory group whose tables will be considered
76
-
71
+
77
72
  Returns list of dictionaries with keys: database, table, partition_name.
78
73
  Only partitions of tables within the specified database are returned.
79
74
  """
80
75
  cluster_tz = db.timezone
81
-
76
+
82
77
  if baseline_backup_label:
83
78
  baseline_query = f"""
84
79
  SELECT finished_at
85
80
  FROM ops.backup_history
86
- WHERE label = '{baseline_backup_label}'
81
+ WHERE label = {utils.quote_value(baseline_backup_label)}
87
82
  AND status = 'FINISHED'
88
83
  """
89
84
  baseline_rows = db.query(baseline_query)
90
85
  if not baseline_rows:
91
- raise ValueError(f"Baseline backup '{baseline_backup_label}' not found or not successful")
86
+ raise ValueError(
87
+ f"Baseline backup '{baseline_backup_label}' not found or not successful"
88
+ )
92
89
  baseline_time_raw = baseline_rows[0][0]
93
90
  else:
94
91
  latest_backup = find_latest_full_backup(db, database)
95
92
  if not latest_backup:
96
- raise ValueError(f"No successful full backup found for database '{database}'. Run a full database backup first.")
97
- baseline_time_raw = latest_backup['finished_at']
98
-
93
+ raise ValueError(
94
+ f"No successful full backup found for database '{database}'. Run a full database backup first."
95
+ )
96
+ baseline_time_raw = latest_backup["finished_at"]
97
+
99
98
  if isinstance(baseline_time_raw, datetime.datetime):
100
99
  baseline_time_str = baseline_time_raw.strftime("%Y-%m-%d %H:%M:%S")
101
100
  elif isinstance(baseline_time_raw, str):
102
101
  baseline_time_str = baseline_time_raw
103
102
  else:
104
103
  baseline_time_str = str(baseline_time_raw)
105
-
104
+
106
105
  baseline_dt = timezone.parse_datetime_with_tz(baseline_time_str, cluster_tz)
107
-
106
+
108
107
  group_tables = find_tables_by_group(db, group_name)
109
108
 
110
109
  if not group_tables:
111
110
  return []
112
111
 
113
- db_group_tables = [t for t in group_tables if t['database'] == database]
112
+ db_group_tables = [t for t in group_tables if t["database"] == database]
114
113
 
115
114
  if not db_group_tables:
116
115
  return []
117
-
116
+
118
117
  concrete_tables = []
119
118
  for table_entry in db_group_tables:
120
- if table_entry['table'] == '*':
121
- show_tables_query = f"SHOW TABLES FROM {table_entry['database']}"
119
+ if table_entry["table"] == "*":
120
+ show_tables_query = (
121
+ f"SHOW TABLES FROM {utils.quote_identifier(table_entry['database'])}"
122
+ )
122
123
  tables_rows = db.query(show_tables_query)
123
124
  for row in tables_rows:
124
- concrete_tables.append({
125
- 'database': table_entry['database'],
126
- 'table': row[0]
127
- })
125
+ concrete_tables.append({"database": table_entry["database"], "table": row[0]})
128
126
  else:
129
127
  concrete_tables.append(table_entry)
130
-
128
+
131
129
  recent_partitions = []
132
130
  for table_entry in concrete_tables:
133
- db_name = table_entry['database']
134
- table_name = table_entry['table']
135
-
136
- show_partitions_query = f"SHOW PARTITIONS FROM {db_name}.{table_name}"
131
+ db_name = table_entry["database"]
132
+ table_name = table_entry["table"]
133
+
134
+ show_partitions_query = (
135
+ f"SHOW PARTITIONS FROM {utils.build_qualified_table_name(db_name, table_name)}"
136
+ )
137
137
  try:
138
138
  partition_rows = db.query(show_partitions_query)
139
139
  except Exception as e:
140
140
  logger.error(f"Error showing partitions for table {db_name}.{table_name}: {e}")
141
141
  continue
142
-
142
+
143
143
  for row in partition_rows:
144
144
  # FOR SHARED NOTHING CLUSTER:
145
145
  # PartitionId, PartitionName, VisibleVersion, VisibleVersionTime, VisibleVersionHash, State, PartitionKey, Range, DistributionKey, Buckets, ReplicationNum, StorageMedium, CooldownTime, LastConsistencyCheckTime, DataSize, StorageSize, IsInMemory, RowCount, DataVersion, VersionEpoch, VersionTxnType
@@ -152,86 +152,90 @@ def find_recent_partitions(db, database: str, baseline_backup_label: Optional[st
152
152
  visible_version_time_str = visible_version_time
153
153
  else:
154
154
  visible_version_time_str = str(visible_version_time)
155
-
156
- visible_version_dt = timezone.parse_datetime_with_tz(visible_version_time_str, cluster_tz)
157
-
155
+
156
+ visible_version_dt = timezone.parse_datetime_with_tz(
157
+ visible_version_time_str, cluster_tz
158
+ )
159
+
158
160
  if visible_version_dt > baseline_dt:
159
- recent_partitions.append({
160
- 'database': db_name,
161
- 'table': table_name,
162
- 'partition_name': partition_name
163
- })
164
-
161
+ recent_partitions.append(
162
+ {"database": db_name, "table": table_name, "partition_name": partition_name}
163
+ )
164
+
165
165
  return recent_partitions
166
166
 
167
167
 
168
- def build_incremental_backup_command(partitions: List[Dict[str, str]], repository: str, label: str, database: str) -> str:
168
+ def build_incremental_backup_command(
169
+ partitions: list[dict[str, str]], repository: str, label: str, database: str
170
+ ) -> str:
169
171
  """Build BACKUP command for incremental backup of specific partitions.
170
-
172
+
171
173
  Args:
172
174
  partitions: List of partitions to backup
173
175
  repository: Repository name
174
176
  label: Backup label
175
177
  database: Database name (StarRocks requires BACKUP to be database-specific)
176
-
178
+
177
179
  Note: Filters partitions to only include those from the specified database.
178
180
  """
179
181
  if not partitions:
180
182
  return ""
181
-
182
- db_partitions = [p for p in partitions if p['database'] == database]
183
-
183
+
184
+ db_partitions = [p for p in partitions if p["database"] == database]
185
+
184
186
  if not db_partitions:
185
187
  return ""
186
-
188
+
187
189
  table_partitions = {}
188
190
  for partition in db_partitions:
189
- table_name = partition['table']
191
+ table_name = partition["table"]
190
192
  if table_name not in table_partitions:
191
193
  table_partitions[table_name] = []
192
- table_partitions[table_name].append(partition['partition_name'])
193
-
194
+ table_partitions[table_name].append(partition["partition_name"])
195
+
194
196
  on_clauses = []
195
197
  for table, parts in table_partitions.items():
196
- partitions_str = ", ".join(parts)
197
- on_clauses.append(f"TABLE {table} PARTITION ({partitions_str})")
198
-
198
+ partitions_str = ", ".join(utils.quote_identifier(p) for p in parts)
199
+ on_clauses.append(f"TABLE {utils.quote_identifier(table)} PARTITION ({partitions_str})")
200
+
199
201
  on_clause = ",\n ".join(on_clauses)
200
-
201
- command = f"""BACKUP DATABASE {database} SNAPSHOT {label}
202
- TO {repository}
202
+
203
+ command = f"""BACKUP DATABASE {utils.quote_identifier(database)} SNAPSHOT {utils.quote_identifier(label)}
204
+ TO {utils.quote_identifier(repository)}
203
205
  ON ({on_clause})"""
204
-
206
+
205
207
  return command
206
208
 
207
209
 
208
- def build_full_backup_command(db, group_name: str, repository: str, label: str, database: str) -> str:
210
+ def build_full_backup_command(
211
+ db, group_name: str, repository: str, label: str, database: str
212
+ ) -> str:
209
213
  """Build BACKUP command for an inventory group.
210
-
214
+
211
215
  If the group contains '*' for any entry in the target database, generate a
212
216
  simple BACKUP DATABASE command. Otherwise, generate ON (TABLE ...) list for
213
217
  the specific tables within the database.
214
218
  """
215
219
  tables = find_tables_by_group(db, group_name)
216
220
 
217
- db_entries = [t for t in tables if t['database'] == database]
221
+ db_entries = [t for t in tables if t["database"] == database]
218
222
  if not db_entries:
219
223
  return ""
220
224
 
221
- if any(t['table'] == '*' for t in db_entries):
222
- return f"""BACKUP DATABASE {database} SNAPSHOT {label}
223
- TO {repository}"""
225
+ if any(t["table"] == "*" for t in db_entries):
226
+ return f"""BACKUP DATABASE {utils.quote_identifier(database)} SNAPSHOT {utils.quote_identifier(label)}
227
+ TO {utils.quote_identifier(repository)}"""
224
228
 
225
229
  on_clauses = []
226
230
  for t in db_entries:
227
- on_clauses.append(f"TABLE {t['table']}")
231
+ on_clauses.append(f"TABLE {utils.quote_identifier(t['table'])}")
228
232
  on_clause = ",\n ".join(on_clauses)
229
- return f"""BACKUP DATABASE {database} SNAPSHOT {label}
230
- TO {repository}
233
+ return f"""BACKUP DATABASE {utils.quote_identifier(database)} SNAPSHOT {utils.quote_identifier(label)}
234
+ TO {utils.quote_identifier(repository)}
231
235
  ON ({on_clause})"""
232
236
 
233
237
 
234
- def record_backup_partitions(db, label: str, partitions: List[Dict[str, str]]) -> None:
238
+ def record_backup_partitions(db, label: str, partitions: list[dict[str, str]]) -> None:
235
239
  """Record partition metadata for a backup in ops.backup_partitions table.
236
240
 
237
241
  Args:
@@ -243,62 +247,59 @@ def record_backup_partitions(db, label: str, partitions: List[Dict[str, str]]) -
243
247
  return
244
248
 
245
249
  for partition in partitions:
246
- composite_key = f"{label}|{partition['database']}|{partition['table']}|{partition['partition_name']}"
247
- key_hash = hashlib.md5(composite_key.encode('utf-8')).hexdigest()
250
+ composite_key = (
251
+ f"{label}|{partition['database']}|{partition['table']}|{partition['partition_name']}"
252
+ )
253
+ key_hash = hashlib.md5(composite_key.encode("utf-8")).hexdigest()
248
254
 
249
255
  db.execute(f"""
250
256
  INSERT INTO ops.backup_partitions
251
257
  (key_hash, label, database_name, table_name, partition_name)
252
- VALUES ('{key_hash}', '{label}', '{partition['database']}', '{partition['table']}', '{partition['partition_name']}')
258
+ VALUES ({utils.quote_value(key_hash)}, {utils.quote_value(label)}, {utils.quote_value(partition["database"])}, {utils.quote_value(partition["table"])}, {utils.quote_value(partition["partition_name"])})
253
259
  """)
254
260
 
255
261
 
256
- def get_all_partitions_for_tables(db, database: str, tables: List[Dict[str, str]]) -> List[Dict[str, str]]:
262
+ def get_all_partitions_for_tables(
263
+ db, database: str, tables: list[dict[str, str]]
264
+ ) -> list[dict[str, str]]:
257
265
  """Get all existing partitions for the specified tables.
258
-
266
+
259
267
  Args:
260
268
  db: Database connection
261
269
  database: Database name
262
270
  tables: List of tables with keys: database, table
263
-
271
+
264
272
  Returns:
265
273
  List of partitions with keys: database, table, partition_name
266
274
  """
267
275
  if not tables:
268
276
  return []
269
-
270
- db_tables = [t for t in tables if t['database'] == database]
277
+
278
+ db_tables = [t for t in tables if t["database"] == database]
271
279
  if not db_tables:
272
280
  return []
273
-
274
- where_conditions = [f"DB_NAME = '{database}'", "PARTITION_NAME IS NOT NULL"]
275
-
281
+
282
+ where_conditions = [f"DB_NAME = {utils.quote_value(database)}", "PARTITION_NAME IS NOT NULL"]
283
+
276
284
  table_conditions = []
277
285
  for table in db_tables:
278
- if table['table'] == '*':
286
+ if table["table"] == "*":
279
287
  pass
280
288
  else:
281
- table_conditions.append(f"TABLE_NAME = '{table['table']}'")
282
-
289
+ table_conditions.append(f"TABLE_NAME = {utils.quote_value(table['table'])}")
290
+
283
291
  if table_conditions:
284
292
  where_conditions.append("(" + " OR ".join(table_conditions) + ")")
285
-
293
+
286
294
  where_clause = " AND ".join(where_conditions)
287
-
295
+
288
296
  query = f"""
289
297
  SELECT DB_NAME, TABLE_NAME, PARTITION_NAME
290
298
  FROM information_schema.partitions_meta
291
299
  WHERE {where_clause}
292
300
  ORDER BY TABLE_NAME, PARTITION_NAME
293
301
  """
294
-
302
+
295
303
  rows = db.query(query)
296
-
297
- return [
298
- {
299
- "database": row[0],
300
- "table": row[1],
301
- "partition_name": row[2]
302
- }
303
- for row in rows
304
- ]
304
+
305
+ return [{"database": row[0], "table": row[1], "partition_name": row[2]} for row in rows]
@@ -3,11 +3,11 @@ from __future__ import annotations
3
3
 
4
4
  def ensure_repository(db, name: str) -> None:
5
5
  """Verify that the specified repository exists and is accessible.
6
-
6
+
7
7
  Args:
8
8
  db: Database connection
9
9
  name: Repository name to verify
10
-
10
+
11
11
  Raises:
12
12
  RuntimeError: If repository doesn't exist or has errors
13
13
  """
@@ -18,7 +18,7 @@ def ensure_repository(db, name: str) -> None:
18
18
  f" CREATE REPOSITORY {name} WITH BROKER ON LOCATION '...' PROPERTIES(...)\n"
19
19
  f"For examples, see: https://docs.starrocks.io/docs/sql-reference/sql-statements/data-definition/backup_restore/CREATE_REPOSITORY/"
20
20
  )
21
-
21
+
22
22
  # SHOW REPOSITORIES returns: RepoId, RepoName, CreateTime, IsReadOnly, Location, Broker, ErrMsg
23
23
  err_msg = existing[6]
24
24
  if err_msg and str(err_msg).strip().upper() not in {"", "NULL", "NONE"}:
@@ -32,5 +32,3 @@ def _find_repository(db, name: str):
32
32
  if row and row[1] == name:
33
33
  return row
34
34
  return None
35
-
36
-