starrocks-br 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
starrocks_br/schema.py CHANGED
@@ -1,32 +1,33 @@
1
1
  from . import logger
2
2
 
3
+
3
4
  def initialize_ops_schema(db) -> None:
4
5
  """Initialize the ops database and all required control tables.
5
-
6
+
6
7
  Creates empty ops tables. Does NOT populate with sample data.
7
8
  Users must manually insert their table inventory records.
8
9
  """
9
-
10
+
10
11
  logger.info("Creating ops database...")
11
12
  db.execute("CREATE DATABASE IF NOT EXISTS ops")
12
13
  logger.success("ops database created")
13
-
14
+
14
15
  logger.info("Creating ops.table_inventory...")
15
16
  db.execute(get_table_inventory_schema())
16
17
  logger.success("ops.table_inventory created")
17
-
18
+
18
19
  logger.info("Creating ops.backup_history...")
19
20
  db.execute(get_backup_history_schema())
20
21
  logger.success("ops.backup_history created")
21
-
22
+
22
23
  logger.info("Creating ops.restore_history...")
23
24
  db.execute(get_restore_history_schema())
24
25
  logger.success("ops.restore_history created")
25
-
26
+
26
27
  logger.info("Creating ops.run_status...")
27
28
  db.execute(get_run_status_schema())
28
29
  logger.success("ops.run_status created")
29
-
30
+
30
31
  logger.info("Creating ops.backup_partitions...")
31
32
  db.execute(get_backup_partitions_schema())
32
33
  logger.success("ops.backup_partitions created")
@@ -36,26 +37,26 @@ def initialize_ops_schema(db) -> None:
36
37
 
37
38
  def ensure_ops_schema(db) -> bool:
38
39
  """Ensure ops schema exists, creating it if necessary.
39
-
40
+
40
41
  Returns True if schema was created, False if it already existed.
41
42
  This is called automatically before backup/restore operations.
42
43
  """
43
44
  try:
44
45
  result = db.query("SHOW DATABASES LIKE 'ops'")
45
-
46
+
46
47
  if not result:
47
48
  initialize_ops_schema(db)
48
49
  return True
49
-
50
+
50
51
  db.execute("USE ops")
51
52
  tables_result = db.query("SHOW TABLES")
52
-
53
+
53
54
  if not tables_result or len(tables_result) < 5:
54
55
  initialize_ops_schema(db)
55
56
  return True
56
-
57
+
57
58
  return False
58
-
59
+
59
60
  except Exception:
60
61
  initialize_ops_schema(db)
61
62
  return True
@@ -144,4 +145,4 @@ def get_backup_partitions_schema() -> str:
144
145
  PRIMARY KEY (key_hash)
145
146
  COMMENT "Tracks every partition included in a backup snapshot."
146
147
  DISTRIBUTED BY HASH(key_hash)
147
- """
148
+ """
starrocks_br/timezone.py CHANGED
@@ -1,14 +1,13 @@
1
1
  import datetime
2
- from typing import Union
3
2
  from zoneinfo import ZoneInfo
4
3
 
5
4
 
6
5
  def get_current_time_in_cluster_tz(cluster_tz: str) -> str:
7
6
  """Get current time formatted in cluster timezone.
8
-
7
+
9
8
  Args:
10
9
  cluster_tz: Timezone string (e.g., 'Asia/Shanghai', 'UTC', '+08:00')
11
-
10
+
12
11
  Returns:
13
12
  Formatted datetime string in 'YYYY-MM-DD HH:MM:SS' format in the cluster timezone
14
13
  """
@@ -19,58 +18,58 @@ def get_current_time_in_cluster_tz(cluster_tz: str) -> str:
19
18
 
20
19
  def parse_datetime_with_tz(dt_str: str, tz: str) -> datetime.datetime:
21
20
  """Parse datetime string assuming the given timezone.
22
-
21
+
23
22
  Args:
24
23
  dt_str: Datetime string in 'YYYY-MM-DD HH:MM:SS' format
25
24
  tz: Timezone string (e.g., 'Asia/Shanghai', 'UTC', '+08:00')
26
-
25
+
27
26
  Returns:
28
27
  Timezone-aware datetime object
29
28
  """
30
29
  timezone = _get_timezone(tz)
31
-
30
+
32
31
  dt = datetime.datetime.strptime(dt_str, "%Y-%m-%d %H:%M:%S")
33
32
  dt = dt.replace(tzinfo=timezone)
34
-
33
+
35
34
  return dt
36
35
 
37
36
 
38
37
  def normalize_datetime_to_tz(dt: datetime.datetime, target_tz: str) -> datetime.datetime:
39
38
  """Convert datetime to target timezone.
40
-
39
+
41
40
  Args:
42
41
  dt: Datetime object (timezone-aware or naive)
43
42
  target_tz: Target timezone string (e.g., 'Asia/Shanghai', 'UTC', '+08:00')
44
-
43
+
45
44
  Returns:
46
45
  Timezone-aware datetime object in the target timezone
47
46
  """
48
47
  timezone = _get_timezone(target_tz)
49
-
48
+
50
49
  if dt.tzinfo is None:
51
50
  dt = dt.replace(tzinfo=datetime.timezone.utc)
52
-
51
+
53
52
  dt = dt.astimezone(timezone)
54
-
53
+
55
54
  return dt
56
55
 
57
56
 
58
- def _get_timezone(tz_str: str) -> Union[ZoneInfo, datetime.timezone]:
57
+ def _get_timezone(tz_str: str) -> ZoneInfo | datetime.timezone:
59
58
  """Get timezone object from timezone string.
60
-
59
+
61
60
  Handles both named timezones (e.g., 'Asia/Shanghai') and offset strings (e.g., '+08:00', '-05:00').
62
-
61
+
63
62
  Args:
64
63
  tz_str: Timezone string
65
-
64
+
66
65
  Returns:
67
66
  ZoneInfo or timezone object
68
67
  """
69
68
  tz_str = tz_str.strip()
70
-
69
+
71
70
  if tz_str.upper() == "UTC" or tz_str == "+00:00" or tz_str == "-00:00":
72
71
  return ZoneInfo("UTC")
73
-
72
+
74
73
  if tz_str.startswith(("+", "-")):
75
74
  try:
76
75
  hours, minutes = _parse_offset(tz_str)
@@ -78,7 +77,7 @@ def _get_timezone(tz_str: str) -> Union[ZoneInfo, datetime.timezone]:
78
77
  return datetime.timezone(offset)
79
78
  except ValueError:
80
79
  return ZoneInfo("UTC")
81
-
80
+
82
81
  try:
83
82
  return ZoneInfo(tz_str)
84
83
  except Exception:
@@ -87,13 +86,13 @@ def _get_timezone(tz_str: str) -> Union[ZoneInfo, datetime.timezone]:
87
86
 
88
87
  def _parse_offset(offset_str: str) -> tuple[int, int]:
89
88
  """Parse timezone offset string to hours and minutes.
90
-
89
+
91
90
  Args:
92
91
  offset_str: Offset string in format '+HH:MM' or '-HH:MM'
93
-
92
+
94
93
  Returns:
95
94
  Tuple of (hours, minutes)
96
-
95
+
97
96
  Raises:
98
97
  ValueError: If offset string is invalid, including:
99
98
  - String length < 6 characters
@@ -103,23 +102,22 @@ def _parse_offset(offset_str: str) -> tuple[int, int]:
103
102
  """
104
103
  if len(offset_str) < 6:
105
104
  raise ValueError(f"Invalid offset format: {offset_str}")
106
-
107
- if offset_str[3] != ':':
105
+
106
+ if offset_str[3] != ":":
108
107
  raise ValueError(f"Invalid offset format: {offset_str} (missing colon)")
109
-
110
- sign = 1 if offset_str[0] == '+' else -1
111
-
108
+
109
+ sign = 1 if offset_str[0] == "+" else -1
110
+
112
111
  try:
113
112
  hours = int(offset_str[1:3])
114
113
  minutes = int(offset_str[4:6])
115
114
  except ValueError as e:
116
115
  raise ValueError(f"Invalid offset format: {offset_str} (non-numeric values)") from e
117
-
116
+
118
117
  if hours < 0 or hours >= 24:
119
118
  raise ValueError(f"Invalid offset format: {offset_str} (hours must be 00-23)")
120
-
119
+
121
120
  if minutes < 0 or minutes >= 60:
122
121
  raise ValueError(f"Invalid offset format: {offset_str} (minutes must be 00-59)")
123
-
124
- return sign * hours, sign * minutes
125
122
 
123
+ return sign * hours, sign * minutes
starrocks_br/utils.py ADDED
@@ -0,0 +1,86 @@
1
+ def quote_identifier(identifier):
2
+ """
3
+ Quote a SQL identifier (database, table, or column name) with backticks.
4
+
5
+ Args:
6
+ identifier: The database, table, or column name to quote
7
+
8
+ Returns:
9
+ The identifier wrapped in backticks with internal backticks escaped
10
+
11
+ Raises:
12
+ ValueError: If identifier is None or empty string
13
+
14
+ Examples:
15
+ >>> quote_identifier("my_table")
16
+ '`my_table`'
17
+ >>> quote_identifier("select")
18
+ '`select`'
19
+ >>> quote_identifier("table`with`ticks")
20
+ '`table``with``ticks`'
21
+ """
22
+ if identifier is None:
23
+ raise ValueError("Identifier cannot be None")
24
+
25
+ if identifier == "":
26
+ raise ValueError("Identifier cannot be empty")
27
+
28
+ escaped = identifier.replace("`", "``")
29
+ return f"`{escaped}`"
30
+
31
+
32
+ def quote_value(value):
33
+ """
34
+ Quote and escape a SQL string value for safe query interpolation.
35
+
36
+ Args:
37
+ value: The string value to quote and escape
38
+
39
+ Returns:
40
+ The properly quoted and escaped SQL value
41
+
42
+ Examples:
43
+ >>> quote_value("test")
44
+ "'test'"
45
+ >>> quote_value("O'Brien")
46
+ "'O''Brien'"
47
+ >>> quote_value(None)
48
+ 'NULL'
49
+ """
50
+ if value is None:
51
+ return "NULL"
52
+
53
+ value = str(value)
54
+ escaped = value.replace("\\", "\\\\")
55
+ escaped = escaped.replace("'", "''")
56
+ escaped = escaped.replace("\n", "\\n")
57
+ escaped = escaped.replace("\t", "\\t")
58
+
59
+ return f"'{escaped}'"
60
+
61
+
62
+ def build_qualified_table_name(database, table):
63
+ """
64
+ Build a fully qualified table name with proper quoting.
65
+
66
+ Args:
67
+ database: The database name
68
+ table: The table name
69
+
70
+ Returns:
71
+ Fully qualified table name in format `database`.`table`
72
+
73
+ Raises:
74
+ ValueError: If database or table is None or empty
75
+
76
+ Examples:
77
+ >>> build_qualified_table_name("my_db", "my_table")
78
+ '`my_db`.`my_table`'
79
+ """
80
+ if not database:
81
+ raise ValueError("Database name cannot be empty or None")
82
+
83
+ if not table:
84
+ raise ValueError("Table name cannot be empty or None")
85
+
86
+ return f"{quote_identifier(database)}.{quote_identifier(table)}"
@@ -0,0 +1,153 @@
1
+ Metadata-Version: 2.4
2
+ Name: starrocks-br
3
+ Version: 0.5.0
4
+ Summary: StarRocks Backup and Restore automation tool
5
+ Requires-Python: >=3.10
6
+ Description-Content-Type: text/markdown
7
+ Requires-Dist: click<9,>=8.1.7
8
+ Requires-Dist: PyYAML<7,>=6.0.1
9
+ Requires-Dist: mysql-connector-python<10,>=9.0.0
10
+ Provides-Extra: dev
11
+ Requires-Dist: pytest<9,>=8.3.2; extra == "dev"
12
+ Requires-Dist: pytest-mock<4,>=3.14.0; extra == "dev"
13
+ Requires-Dist: pytest-cov<6,>=5.0.0; extra == "dev"
14
+ Requires-Dist: ruff<1,>=0.8.0; extra == "dev"
15
+ Requires-Dist: pre-commit<5,>=4.0.0; extra == "dev"
16
+
17
+ # StarRocks Backup & Restore
18
+
19
+ Full and incremental backup automation for StarRocks shared-nothing clusters.
20
+
21
+ **Requirements:** StarRocks 3.5+ (shared-nothing mode)
22
+
23
+ 📋 **[Release Notes & Changelog](CHANGELOG.md)**
24
+
25
+ ## Table of Contents
26
+
27
+ - [Why This Tool?](#why-this-tool)
28
+ - [Documentation](#documentation)
29
+ - [Installation](#installation)
30
+ - [Configuration](#configuration)
31
+ - [Basic Usage](#basic-usage)
32
+ - [How It Works](#how-it-works)
33
+
34
+ ## Why This Tool?
35
+
36
+ StarRocks provides native `BACKUP` and `RESTORE` commands, but they only support full backups. For large-scale deployments hosting data at petabyte scale, full backups are not feasible due to time, storage, and network constraints.
37
+
38
+ This tool adds **incremental backup capabilities** to StarRocks by leveraging native partition-based backup features.
39
+
40
+ **What StarRocks doesn't provide:**
41
+ - ❌ **No incremental backups** - You must manually identify changed partitions and build complex backup commands
42
+ - ❌ **No backup history** - No built-in way to track what was backed up, when, or which backups succeeded/failed
43
+ - ❌ **No restore intelligence** - You manually determine which backups are needed for point-in-time recovery
44
+ - ❌ **No organization** - No way to group tables or manage different backup strategies
45
+ - ❌ **No concurrency control** - Multiple backup operations can conflict
46
+
47
+ **What this tool provides:**
48
+ - ✅ **Automatic incremental backups** - Tool detects changed partitions since the last full backup automatically
49
+ - ✅ **Complete operation tracking** - Every backup and restore is logged with status, timestamps, and error details
50
+ - ✅ **Intelligent restore** - Automatically resolves backup chains (full + incremental) for you
51
+ - ✅ **Inventory groups** - Organize tables into groups with different backup strategies
52
+ - ✅ **Job concurrency control** - Prevents conflicting operations
53
+ - ✅ **Safe restores** - Atomic rename mechanism prevents data loss during restore
54
+ - ✅ **Metadata management** - Dedicated `ops` database tracks all backup metadata and partition manifests
55
+
56
+ In short: this tool transforms StarRocks's basic backup/restore commands into a **production-ready incremental backup solution**.
57
+
58
+ ## Documentation
59
+
60
+ - **[Getting Started](docs/getting-started.md)** - Step-by-step tutorial
61
+ - **[Core Concepts](docs/core-concepts.md)** - Understand inventory groups, backup types, and restore chains
62
+ - **[Installation](docs/installation.md)** - All installation methods
63
+ - **[Configuration](docs/configuration.md)** - Config file reference and TLS setup
64
+ - **[Commands](docs/commands.md)** - Detailed command reference
65
+ - **[Scheduling & Monitoring](docs/scheduling.md)** - Automate backups and monitor status
66
+
67
+ ## Installation
68
+
69
+ ### Option 1: PyPI
70
+
71
+ ```bash
72
+ python3 -m venv .venv
73
+ source .venv/bin/activate
74
+ pip install starrocks-br
75
+ ```
76
+
77
+ ### Option 2: Standalone Executable
78
+
79
+ Download from [releases](https://github.com/deep-bi/starrocks-br/releases/latest):
80
+
81
+ ```bash
82
+ # Linux
83
+ chmod +x starrocks-br-linux-x86_64
84
+ mv starrocks-br-linux-x86_64 starrocks-br
85
+ ./starrocks-br --help
86
+ ```
87
+
88
+ See [Installation Guide](docs/installation.md) for all options.
89
+
90
+ ## Configuration
91
+
92
+ Create a `config.yaml` file pointing to your StarRocks cluster:
93
+
94
+ ```yaml
95
+ host: "127.0.0.1" # StarRocks FE node address
96
+ port: 9030 # MySQL protocol port
97
+ user: "root" # Database user with backup/restore privileges
98
+ database: "your_database" # Database containing tables to backup
99
+ repository: "your_repo_name" # Repository created via CREATE REPOSITORY in StarRocks
100
+ ```
101
+
102
+ Set password:
103
+ ```bash
104
+ export STARROCKS_PASSWORD="your_password"
105
+ ```
106
+
107
+ See [Configuration Reference](docs/configuration.md) for TLS and advanced options.
108
+
109
+ ## Basic Usage
110
+
111
+ **Initialize:**
112
+ ```bash
113
+ starrocks-br init --config config.yaml
114
+ ```
115
+
116
+ **Define inventory groups** (in StarRocks):
117
+ ```sql
118
+ INSERT INTO ops.table_inventory (inventory_group, database_name, table_name)
119
+ VALUES
120
+ ('production', 'mydb', 'users'),
121
+ ('production', 'mydb', 'orders');
122
+ ```
123
+
124
+ **Backup:**
125
+ ```bash
126
+ # Full backup
127
+ starrocks-br backup full --config config.yaml --group production
128
+
129
+ # Incremental backup (tool detects changed partitions automatically)
130
+ starrocks-br backup incremental --config config.yaml --group production
131
+ ```
132
+
133
+ **Restore:**
134
+ ```bash
135
+ # Tool automatically resolves backup chains
136
+ starrocks-br restore --config config.yaml --target-label mydb_20251118_full
137
+ ```
138
+
139
+ See [Commands Reference](docs/commands.md) for all options.
140
+
141
+ ## How It Works
142
+
143
+ 1. **Inventory Groups**: Define collections of tables that share the same backup strategy
144
+ 2. **ops Database**: Tool creates an `ops` database to track all operations and metadata
145
+ 3. **Automatic Incrementals**: Tool queries partition metadata and compares with the baseline to detect changes
146
+ 4. **Intelligent Restore**: Automatically resolves backup chains (full + incremental) for point-in-time recovery
147
+ 5. **Safe Operations**: All restores use temporary tables with atomic rename for safety
148
+
149
+ Read [Core Concepts](docs/core-concepts.md) for detailed explanations.
150
+
151
+ ## Contributing
152
+
153
+ We welcome contributions! See issues for areas that need help or create a new issue to report a bug or request a feature.
@@ -0,0 +1,23 @@
1
+ starrocks_br/__init__.py,sha256=i1m0FIl2IAXaVyNoya0ZNAx3WfhIp9I6VLhTz06qNFY,28
2
+ starrocks_br/cli.py,sha256=fMtTLGFgEfM1HkXX5y0IVmTC6yCcGLfYnoA8G-qPWCs,21686
3
+ starrocks_br/concurrency.py,sha256=N0LD4VHTAFNhD4YslrkOCDSx5cnR5rCEkNH9MkODxv8,5903
4
+ starrocks_br/config.py,sha256=APqOZcJuUzYmGNHoJRlsu4l3sWl_4SS1kRLKjKm2Oag,2059
5
+ starrocks_br/db.py,sha256=47ynDQ9kdykJRj_nrHxX020b9njozzQxiZBI9lFdS7A,4946
6
+ starrocks_br/error_handler.py,sha256=qqN3Ht2YCHMzvnP_snPIPJuZPKxvgrHSt2qlVfItBY8,10830
7
+ starrocks_br/exceptions.py,sha256=vStzFWxDpO6krg1l-_6IxrXNKI8jc0aYSs7GiVsDze8,3273
8
+ starrocks_br/executor.py,sha256=YE12jiU-4tru2D7BAe8Y0Fom72LHjGz04obN4FcAWhA,11345
9
+ starrocks_br/health.py,sha256=rmkgNYf6kk3VDZx-PmnAG3lzmtvnJcUPG7Ppb6BA7IU,1021
10
+ starrocks_br/history.py,sha256=ewXMVUHJvpWjvPndYUdz9xPh24HDPiUAuJgIALuWays,2964
11
+ starrocks_br/labels.py,sha256=07UFd8BMyyV2MQwf7NaLviuu37lMLOOFX3DCbf_XqOE,1662
12
+ starrocks_br/logger.py,sha256=8F7ZnqCOVFJDt6-rZevh94udGbhZhDLrBw8W3RZbM-4,1432
13
+ starrocks_br/planner.py,sha256=wbOTKZvuWAFaGWXcciKOIveLzfYWsnGXK1ZI4lr7MVU,10892
14
+ starrocks_br/repository.py,sha256=gZgT0mAjs-AAdESXPF8Syv0bE8m5njya5leTageElQ8,1251
15
+ starrocks_br/restore.py,sha256=7_VcrGt0KVqhWe9f3JgQYDMNuM6_EjBqCi63BiSa2WY,20105
16
+ starrocks_br/schema.py,sha256=FSJjcz4q3SU_rHLptsSzrlm-o0dcvIu6LbpT-Z5GyZA,6199
17
+ starrocks_br/timezone.py,sha256=WlB_gkgI4AjQzqHVA1eG9CY_9QiX5cYpVKjQLvSrd4Y,3578
18
+ starrocks_br/utils.py,sha256=LF3uBdaNMeslE4UHl_wwv4QErCS48ISxpPqYX8dbrc8,2176
19
+ starrocks_br-0.5.0.dist-info/METADATA,sha256=kfTP9BWHOUoF7gvh8tgH2TufIRmWQasVGZgXw4GH5Ec,5728
20
+ starrocks_br-0.5.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
21
+ starrocks_br-0.5.0.dist-info/entry_points.txt,sha256=AKUt01G2MAlh85s1Q9kNQDOUio14kaTnT3dmg9gjdNg,54
22
+ starrocks_br-0.5.0.dist-info/top_level.txt,sha256=CU1tGVo0kjulhDr761Sndg-oTeRKsisDnWm8UG95aBE,13
23
+ starrocks_br-0.5.0.dist-info/RECORD,,