starrocks-br 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
starrocks_br/planner.py CHANGED
@@ -1,17 +1,17 @@
1
- from typing import List, Dict, Optional
2
1
  import datetime
3
2
  import hashlib
3
+ from typing import Optional
4
4
 
5
- from starrocks_br import logger, timezone
5
+ from starrocks_br import logger, timezone, utils
6
6
 
7
7
 
8
- def find_latest_full_backup(db, database: str) -> Optional[Dict[str, str]]:
8
+ def find_latest_full_backup(db, database: str) -> Optional[dict[str, str]]:
9
9
  """Find the latest successful full backup for a database.
10
-
10
+
11
11
  Args:
12
12
  db: Database connection
13
13
  database: Database name to search for
14
-
14
+
15
15
  Returns:
16
16
  Dictionary with keys: label, backup_type, finished_at, or None if no full backup found.
17
17
  The finished_at value is returned as a string in the cluster timezone format.
@@ -21,125 +21,126 @@ def find_latest_full_backup(db, database: str) -> Optional[Dict[str, str]]:
21
21
  FROM ops.backup_history
22
22
  WHERE backup_type = 'full'
23
23
  AND status = 'FINISHED'
24
- AND label LIKE '{database}_%'
24
+ AND label LIKE {utils.quote_value(f"{database}_%")}
25
25
  ORDER BY finished_at DESC
26
26
  LIMIT 1
27
27
  """
28
-
28
+
29
29
  rows = db.query(query)
30
-
30
+
31
31
  if not rows:
32
32
  return None
33
-
33
+
34
34
  row = rows[0]
35
35
  finished_at = row[2]
36
-
36
+
37
37
  if isinstance(finished_at, datetime.datetime):
38
- cluster_tz = db.timezone
39
- finished_at = finished_at.strftime("%Y-%m-%d %H:%M:%S")
38
+ finished_at_normalized = timezone.normalize_datetime_to_tz(finished_at, db.timezone)
39
+ finished_at = finished_at_normalized.strftime("%Y-%m-%d %H:%M:%S")
40
40
  elif not isinstance(finished_at, str):
41
41
  finished_at = str(finished_at)
42
-
43
- return {
44
- "label": row[0],
45
- "backup_type": row[1],
46
- "finished_at": finished_at
47
- }
48
42
 
43
+ return {"label": row[0], "backup_type": row[1], "finished_at": finished_at}
49
44
 
50
- def find_tables_by_group(db, group_name: str) -> List[Dict[str, str]]:
45
+
46
+ def find_tables_by_group(db, group_name: str) -> list[dict[str, str]]:
51
47
  """Find tables belonging to a specific inventory group.
52
-
48
+
53
49
  Returns list of dictionaries with keys: database, table.
54
50
  Supports '*' table wildcard which signifies all tables in a database.
55
51
  """
56
52
  query = f"""
57
53
  SELECT database_name, table_name
58
54
  FROM ops.table_inventory
59
- WHERE inventory_group = '{group_name}'
55
+ WHERE inventory_group = {utils.quote_value(group_name)}
60
56
  ORDER BY database_name, table_name
61
57
  """
62
58
  rows = db.query(query)
63
- return [
64
- {"database": row[0], "table": row[1]} for row in rows
65
- ]
59
+ return [{"database": row[0], "table": row[1]} for row in rows]
66
60
 
67
61
 
68
- def find_recent_partitions(db, database: str, baseline_backup_label: Optional[str] = None, *, group_name: str) -> List[Dict[str, str]]:
62
+ def find_recent_partitions(
63
+ db, database: str, baseline_backup_label: Optional[str] = None, *, group_name: str
64
+ ) -> list[dict[str, str]]:
69
65
  """Find partitions updated since baseline for tables in the given inventory group.
70
-
66
+
71
67
  Args:
72
68
  db: Database connection
73
69
  database: Database name (StarRocks database scope for backup)
74
70
  baseline_backup_label: Optional specific backup label to use as baseline.
75
71
  group_name: Inventory group whose tables will be considered
76
-
72
+
77
73
  Returns list of dictionaries with keys: database, table, partition_name.
78
74
  Only partitions of tables within the specified database are returned.
79
75
  """
80
76
  cluster_tz = db.timezone
81
-
77
+
82
78
  if baseline_backup_label:
83
79
  baseline_query = f"""
84
80
  SELECT finished_at
85
81
  FROM ops.backup_history
86
- WHERE label = '{baseline_backup_label}'
82
+ WHERE label = {utils.quote_value(baseline_backup_label)}
87
83
  AND status = 'FINISHED'
88
84
  """
89
85
  baseline_rows = db.query(baseline_query)
90
86
  if not baseline_rows:
91
- raise ValueError(f"Baseline backup '{baseline_backup_label}' not found or not successful")
87
+ raise ValueError(
88
+ f"Baseline backup '{baseline_backup_label}' not found or not successful"
89
+ )
92
90
  baseline_time_raw = baseline_rows[0][0]
93
91
  else:
94
92
  latest_backup = find_latest_full_backup(db, database)
95
93
  if not latest_backup:
96
- raise ValueError(f"No successful full backup found for database '{database}'. Run a full database backup first.")
97
- baseline_time_raw = latest_backup['finished_at']
98
-
94
+ raise ValueError(
95
+ f"No successful full backup found for database '{database}'. Run a full database backup first."
96
+ )
97
+ baseline_time_raw = latest_backup["finished_at"]
98
+
99
99
  if isinstance(baseline_time_raw, datetime.datetime):
100
100
  baseline_time_str = baseline_time_raw.strftime("%Y-%m-%d %H:%M:%S")
101
101
  elif isinstance(baseline_time_raw, str):
102
102
  baseline_time_str = baseline_time_raw
103
103
  else:
104
104
  baseline_time_str = str(baseline_time_raw)
105
-
105
+
106
106
  baseline_dt = timezone.parse_datetime_with_tz(baseline_time_str, cluster_tz)
107
-
107
+
108
108
  group_tables = find_tables_by_group(db, group_name)
109
109
 
110
110
  if not group_tables:
111
111
  return []
112
112
 
113
- db_group_tables = [t for t in group_tables if t['database'] == database]
113
+ db_group_tables = [t for t in group_tables if t["database"] == database]
114
114
 
115
115
  if not db_group_tables:
116
116
  return []
117
-
117
+
118
118
  concrete_tables = []
119
119
  for table_entry in db_group_tables:
120
- if table_entry['table'] == '*':
121
- show_tables_query = f"SHOW TABLES FROM {table_entry['database']}"
120
+ if table_entry["table"] == "*":
121
+ show_tables_query = (
122
+ f"SHOW TABLES FROM {utils.quote_identifier(table_entry['database'])}"
123
+ )
122
124
  tables_rows = db.query(show_tables_query)
123
125
  for row in tables_rows:
124
- concrete_tables.append({
125
- 'database': table_entry['database'],
126
- 'table': row[0]
127
- })
126
+ concrete_tables.append({"database": table_entry["database"], "table": row[0]})
128
127
  else:
129
128
  concrete_tables.append(table_entry)
130
-
129
+
131
130
  recent_partitions = []
132
131
  for table_entry in concrete_tables:
133
- db_name = table_entry['database']
134
- table_name = table_entry['table']
135
-
136
- show_partitions_query = f"SHOW PARTITIONS FROM {db_name}.{table_name}"
132
+ db_name = table_entry["database"]
133
+ table_name = table_entry["table"]
134
+
135
+ show_partitions_query = (
136
+ f"SHOW PARTITIONS FROM {utils.build_qualified_table_name(db_name, table_name)}"
137
+ )
137
138
  try:
138
139
  partition_rows = db.query(show_partitions_query)
139
140
  except Exception as e:
140
141
  logger.error(f"Error showing partitions for table {db_name}.{table_name}: {e}")
141
142
  continue
142
-
143
+
143
144
  for row in partition_rows:
144
145
  # FOR SHARED NOTHING CLUSTER:
145
146
  # PartitionId, PartitionName, VisibleVersion, VisibleVersionTime, VisibleVersionHash, State, PartitionKey, Range, DistributionKey, Buckets, ReplicationNum, StorageMedium, CooldownTime, LastConsistencyCheckTime, DataSize, StorageSize, IsInMemory, RowCount, DataVersion, VersionEpoch, VersionTxnType
@@ -152,86 +153,90 @@ def find_recent_partitions(db, database: str, baseline_backup_label: Optional[st
152
153
  visible_version_time_str = visible_version_time
153
154
  else:
154
155
  visible_version_time_str = str(visible_version_time)
155
-
156
- visible_version_dt = timezone.parse_datetime_with_tz(visible_version_time_str, cluster_tz)
157
-
156
+
157
+ visible_version_dt = timezone.parse_datetime_with_tz(
158
+ visible_version_time_str, cluster_tz
159
+ )
160
+
158
161
  if visible_version_dt > baseline_dt:
159
- recent_partitions.append({
160
- 'database': db_name,
161
- 'table': table_name,
162
- 'partition_name': partition_name
163
- })
164
-
162
+ recent_partitions.append(
163
+ {"database": db_name, "table": table_name, "partition_name": partition_name}
164
+ )
165
+
165
166
  return recent_partitions
166
167
 
167
168
 
168
- def build_incremental_backup_command(partitions: List[Dict[str, str]], repository: str, label: str, database: str) -> str:
169
+ def build_incremental_backup_command(
170
+ partitions: list[dict[str, str]], repository: str, label: str, database: str
171
+ ) -> str:
169
172
  """Build BACKUP command for incremental backup of specific partitions.
170
-
173
+
171
174
  Args:
172
175
  partitions: List of partitions to backup
173
176
  repository: Repository name
174
177
  label: Backup label
175
178
  database: Database name (StarRocks requires BACKUP to be database-specific)
176
-
179
+
177
180
  Note: Filters partitions to only include those from the specified database.
178
181
  """
179
182
  if not partitions:
180
183
  return ""
181
-
182
- db_partitions = [p for p in partitions if p['database'] == database]
183
-
184
+
185
+ db_partitions = [p for p in partitions if p["database"] == database]
186
+
184
187
  if not db_partitions:
185
188
  return ""
186
-
189
+
187
190
  table_partitions = {}
188
191
  for partition in db_partitions:
189
- table_name = partition['table']
192
+ table_name = partition["table"]
190
193
  if table_name not in table_partitions:
191
194
  table_partitions[table_name] = []
192
- table_partitions[table_name].append(partition['partition_name'])
193
-
195
+ table_partitions[table_name].append(partition["partition_name"])
196
+
194
197
  on_clauses = []
195
198
  for table, parts in table_partitions.items():
196
- partitions_str = ", ".join(parts)
197
- on_clauses.append(f"TABLE {table} PARTITION ({partitions_str})")
198
-
199
+ partitions_str = ", ".join(utils.quote_identifier(p) for p in parts)
200
+ on_clauses.append(f"TABLE {utils.quote_identifier(table)} PARTITION ({partitions_str})")
201
+
199
202
  on_clause = ",\n ".join(on_clauses)
200
-
201
- command = f"""BACKUP DATABASE {database} SNAPSHOT {label}
202
- TO {repository}
203
+
204
+ command = f"""BACKUP DATABASE {utils.quote_identifier(database)} SNAPSHOT {utils.quote_identifier(label)}
205
+ TO {utils.quote_identifier(repository)}
203
206
  ON ({on_clause})"""
204
-
207
+
205
208
  return command
206
209
 
207
210
 
208
- def build_full_backup_command(db, group_name: str, repository: str, label: str, database: str) -> str:
211
+ def build_full_backup_command(
212
+ db, group_name: str, repository: str, label: str, database: str
213
+ ) -> str:
209
214
  """Build BACKUP command for an inventory group.
210
-
215
+
211
216
  If the group contains '*' for any entry in the target database, generate a
212
217
  simple BACKUP DATABASE command. Otherwise, generate ON (TABLE ...) list for
213
218
  the specific tables within the database.
214
219
  """
215
220
  tables = find_tables_by_group(db, group_name)
216
221
 
217
- db_entries = [t for t in tables if t['database'] == database]
222
+ db_entries = [t for t in tables if t["database"] == database]
218
223
  if not db_entries:
219
224
  return ""
220
225
 
221
- if any(t['table'] == '*' for t in db_entries):
222
- return f"""BACKUP DATABASE {database} SNAPSHOT {label}
223
- TO {repository}"""
226
+ if any(t["table"] == "*" for t in db_entries):
227
+ return f"""BACKUP DATABASE {utils.quote_identifier(database)} SNAPSHOT {utils.quote_identifier(label)}
228
+ TO {utils.quote_identifier(repository)}"""
224
229
 
225
230
  on_clauses = []
226
231
  for t in db_entries:
227
- on_clauses.append(f"TABLE {t['table']}")
232
+ on_clauses.append(f"TABLE {utils.quote_identifier(t['table'])}")
228
233
  on_clause = ",\n ".join(on_clauses)
229
- return f"""BACKUP DATABASE {database} SNAPSHOT {label}
230
- TO {repository}
234
+ return f"""BACKUP DATABASE {utils.quote_identifier(database)} SNAPSHOT {utils.quote_identifier(label)}
235
+ TO {utils.quote_identifier(repository)}
231
236
  ON ({on_clause})"""
232
237
 
233
238
 
234
- def record_backup_partitions(db, label: str, partitions: List[Dict[str, str]]) -> None:
239
+ def record_backup_partitions(db, label: str, partitions: list[dict[str, str]]) -> None:
235
240
  """Record partition metadata for a backup in ops.backup_partitions table.
236
241
 
237
242
  Args:
@@ -243,62 +248,59 @@ def record_backup_partitions(db, label: str, partitions: List[Dict[str, str]]) -
243
248
  return
244
249
 
245
250
  for partition in partitions:
246
- composite_key = f"{label}|{partition['database']}|{partition['table']}|{partition['partition_name']}"
247
- key_hash = hashlib.md5(composite_key.encode('utf-8')).hexdigest()
251
+ composite_key = (
252
+ f"{label}|{partition['database']}|{partition['table']}|{partition['partition_name']}"
253
+ )
254
+ key_hash = hashlib.md5(composite_key.encode("utf-8")).hexdigest()
248
255
 
249
256
  db.execute(f"""
250
257
  INSERT INTO ops.backup_partitions
251
258
  (key_hash, label, database_name, table_name, partition_name)
252
- VALUES ('{key_hash}', '{label}', '{partition['database']}', '{partition['table']}', '{partition['partition_name']}')
259
+ VALUES ({utils.quote_value(key_hash)}, {utils.quote_value(label)}, {utils.quote_value(partition["database"])}, {utils.quote_value(partition["table"])}, {utils.quote_value(partition["partition_name"])})
253
260
  """)
254
261
 
255
262
 
256
- def get_all_partitions_for_tables(db, database: str, tables: List[Dict[str, str]]) -> List[Dict[str, str]]:
263
+ def get_all_partitions_for_tables(
264
+ db, database: str, tables: list[dict[str, str]]
265
+ ) -> list[dict[str, str]]:
257
266
  """Get all existing partitions for the specified tables.
258
-
267
+
259
268
  Args:
260
269
  db: Database connection
261
270
  database: Database name
262
271
  tables: List of tables with keys: database, table
263
-
272
+
264
273
  Returns:
265
274
  List of partitions with keys: database, table, partition_name
266
275
  """
267
276
  if not tables:
268
277
  return []
269
-
270
- db_tables = [t for t in tables if t['database'] == database]
278
+
279
+ db_tables = [t for t in tables if t["database"] == database]
271
280
  if not db_tables:
272
281
  return []
273
-
274
- where_conditions = [f"DB_NAME = '{database}'", "PARTITION_NAME IS NOT NULL"]
275
-
282
+
283
+ where_conditions = [f"DB_NAME = {utils.quote_value(database)}", "PARTITION_NAME IS NOT NULL"]
284
+
276
285
  table_conditions = []
277
286
  for table in db_tables:
278
- if table['table'] == '*':
287
+ if table["table"] == "*":
279
288
  pass
280
289
  else:
281
- table_conditions.append(f"TABLE_NAME = '{table['table']}'")
282
-
290
+ table_conditions.append(f"TABLE_NAME = {utils.quote_value(table['table'])}")
291
+
283
292
  if table_conditions:
284
293
  where_conditions.append("(" + " OR ".join(table_conditions) + ")")
285
-
294
+
286
295
  where_clause = " AND ".join(where_conditions)
287
-
296
+
288
297
  query = f"""
289
298
  SELECT DB_NAME, TABLE_NAME, PARTITION_NAME
290
299
  FROM information_schema.partitions_meta
291
300
  WHERE {where_clause}
292
301
  ORDER BY TABLE_NAME, PARTITION_NAME
293
302
  """
294
-
303
+
295
304
  rows = db.query(query)
296
-
297
- return [
298
- {
299
- "database": row[0],
300
- "table": row[1],
301
- "partition_name": row[2]
302
- }
303
- for row in rows
304
- ]
305
+
306
+ return [{"database": row[0], "table": row[1], "partition_name": row[2]} for row in rows]
@@ -3,11 +3,11 @@ from __future__ import annotations
3
3
 
4
4
  def ensure_repository(db, name: str) -> None:
5
5
  """Verify that the specified repository exists and is accessible.
6
-
6
+
7
7
  Args:
8
8
  db: Database connection
9
9
  name: Repository name to verify
10
-
10
+
11
11
  Raises:
12
12
  RuntimeError: If repository doesn't exist or has errors
13
13
  """
@@ -18,7 +18,7 @@ def ensure_repository(db, name: str) -> None:
18
18
  f" CREATE REPOSITORY {name} WITH BROKER ON LOCATION '...' PROPERTIES(...)\n"
19
19
  f"For examples, see: https://docs.starrocks.io/docs/sql-reference/sql-statements/data-definition/backup_restore/CREATE_REPOSITORY/"
20
20
  )
21
-
21
+
22
22
  # SHOW REPOSITORIES returns: RepoId, RepoName, CreateTime, IsReadOnly, Location, Broker, ErrMsg
23
23
  err_msg = existing[6]
24
24
  if err_msg and str(err_msg).strip().upper() not in {"", "NULL", "NONE"}:
@@ -32,5 +32,3 @@ def _find_repository(db, name: str):
32
32
  if row and row[1] == name:
33
33
  return row
34
34
  return None
35
-
36
-